diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFColumnNameTest.java itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFColumnNameTest.java new file mode 100644 index 0000000..19b1306 --- /dev/null +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFColumnNameTest.java @@ -0,0 +1,71 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +public class GenericUDAFColumnNameTest extends AbstractGenericUDAFResolver { + + @Override + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) + throws SemanticException { + return new GenericUDAFColumnNameEvaluator(); + } + + public static class GenericUDAFColumnNameEvaluator extends GenericUDAFEvaluator { + + private transient String colNames; + + @Override + public ObjectInspector init(Mode m, StructObjectInspector inputOI) throws HiveException { + super.init(m, inputOI); + if (m == Mode.PARTIAL1 || m == Mode.COMPLETE) { + StringBuilder builder = new StringBuilder(); + for (StructField field : inputOI.getAllStructFieldRefs()) { + if (builder.length() > 0) { + builder.append(", "); + } + builder.append(field.getFieldName()); + } + colNames = builder.toString(); + } + return PrimitiveObjectInspectorFactory.javaStringObjectInspector; + } + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new DummyBuffer(); + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + } + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return colNames; + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (colNames == null) { + colNames = + PrimitiveObjectInspectorFactory.javaStringObjectInspector.getPrimitiveJavaObject(partial); + } + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + return colNames; + } + } + + private static class DummyBuffer implements GenericUDAFEvaluator.AggregationBuffer { + } +} diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFColumnNameTest.java itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFColumnNameTest.java new file mode 100644 index 0000000..ab916b1 --- /dev/null +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFColumnNameTest.java @@ -0,0 +1,37 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +public class GenericUDFColumnNameTest extends GenericUDF { + + private String colNames; + + @Override + public ObjectInspector initialize(StructObjectInspector inputOI) + throws UDFArgumentException { + StringBuilder builder = new StringBuilder(); + for (StructField field : inputOI.getAllStructFieldRefs()) { + if (builder.length() > 0) { + builder.append(", "); + } + builder.append(field.getFieldName()); + } + colNames = builder.toString(); + return PrimitiveObjectInspectorFactory.javaStringObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + return colNames; + } + + @Override + public String getDisplayString(String[] children) { + return toDisplayString("dummy_udf_for_printing_col_names", children); + } +} diff --git itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFColumnNameTest.java itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFColumnNameTest.java new file mode 100644 index 0000000..ce4b3c1 --- /dev/null +++ itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFColumnNameTest.java @@ -0,0 +1,43 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +public class GenericUDTFColumnNameTest extends GenericUDTF { + + private List colNames; + + @Override + public StructObjectInspector initialize(StructObjectInspector inputOI) + throws UDFArgumentException { + List colNames = new ArrayList(); + List colOIs = new ArrayList(); + for (StructField field : inputOI.getAllStructFieldRefs()) { + colNames.add(field.getFieldName()); + colOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + } + this.colNames = colNames; + return ObjectInspectorFactory.getStandardStructObjectInspector(colNames, colOIs); + } + + @Override + public void process(Object[] args) throws HiveException { + forward(colNames); + } + + @Override + public void close() throws HiveException { + } + + public String toString() { + return "dummy_udtf_for_printing_col_names"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java index bb5f4f3..ffef730 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExprNodeGenericFuncEvaluator.java @@ -18,10 +18,14 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.ArrayList; +import java.util.List; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; @@ -29,7 +33,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * ExprNodeGenericFuncEvaluator. @@ -42,6 +48,7 @@ transient GenericUDF genericUDF; transient Object rowObject; + transient List colNames; transient ExprNodeEvaluator[] children; transient GenericUDF.DeferredObject[] deferredChildren; transient boolean isEager; @@ -102,6 +109,7 @@ public ExprNodeGenericFuncEvaluator(ExprNodeGenericFuncDesc expr) throws HiveExc } } } + colNames = ExprNodeDescUtils.recommendInputNames(expr.getChildren(), "_c"); genericUDF = expr.getGenericUDF(); if (isEager && (genericUDF instanceof GenericUDFCase || genericUDF instanceof GenericUDFWhen)) { @@ -116,15 +124,17 @@ public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveExcep deferredChildren[i] = new DeferredExprObject(children[i], isEager); } // Initialize all children first - ObjectInspector[] childrenOIs = new ObjectInspector[children.length]; - for (int i = 0; i < children.length; i++) { - childrenOIs[i] = children[i].initialize(rowInspector); + List childrenOIs = new ArrayList(); + for (ExprNodeEvaluator evaluator : children) { + childrenOIs.add(evaluator.initialize(rowInspector)); } MapredContext context = MapredContext.get(); if (context != null) { context.setup(genericUDF); } - return outputOI = genericUDF.initializeAndFoldConstants(childrenOIs); + StructObjectInspector inputOI = + ObjectInspectorFactory.getStandardStructObjectInspector(colNames, childrenOIs); + return outputOI = genericUDF.initializeAndFoldConstants(inputOI); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java index 792d87f..fa4e0e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java @@ -86,6 +86,7 @@ protected transient ExprNodeEvaluator[][] aggregationParameterFields; protected transient ObjectInspector[][] aggregationParameterObjectInspectors; protected transient ObjectInspector[][] aggregationParameterStandardObjectInspectors; + protected transient String[][] aggregationParameterColNames; protected transient Object[][] aggregationParameterObjects; // so aggregationIsDistinct is a boolean array instead of a single number. protected transient boolean[] aggregationIsDistinct; @@ -269,14 +270,17 @@ protected void initializeOp(Configuration hconf) throws HiveException { // init aggregationParameterFields ArrayList aggrs = conf.getAggregators(); aggregationParameterFields = new ExprNodeEvaluator[aggrs.size()][]; + aggregationParameterColNames = new String[aggrs.size()][]; aggregationParameterObjectInspectors = new ObjectInspector[aggrs.size()][]; aggregationParameterStandardObjectInspectors = new ObjectInspector[aggrs.size()][]; aggregationParameterObjects = new Object[aggrs.size()][]; aggregationIsDistinct = new boolean[aggrs.size()]; for (int i = 0; i < aggrs.size(); i++) { AggregationDesc aggr = aggrs.get(i); + ArrayList colNames = aggr.getColNames(); ArrayList parameters = aggr.getParameters(); aggregationParameterFields[i] = new ExprNodeEvaluator[parameters.size()]; + aggregationParameterColNames[i] = new String[parameters.size()]; aggregationParameterObjectInspectors[i] = new ObjectInspector[parameters .size()]; aggregationParameterStandardObjectInspectors[i] = new ObjectInspector[parameters @@ -285,6 +289,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { for (int j = 0; j < parameters.size(); j++) { aggregationParameterFields[i][j] = ExprNodeEvaluatorFactory .get(parameters.get(j)); + aggregationParameterColNames[i][j] = colNames.get(j); aggregationParameterObjectInspectors[i][j] = aggregationParameterFields[i][j] .initialize(rowInspector); if (unionExprEval != null) { @@ -359,8 +364,12 @@ protected void initializeOp(Configuration hconf) throws HiveException { } } for (int i = 0; i < aggregationEvaluators.length; i++) { + StructObjectInspector inputOI = ObjectInspectorFactory.getStandardStructObjectInspector( + Arrays.asList(aggregationParameterColNames[i]), + Arrays.asList(aggregationParameterObjectInspectors[i]) + ); ObjectInspector roi = aggregationEvaluators[i].init(conf.getAggregators() - .get(i).getMode(), aggregationParameterObjectInspectors[i]); + .get(i).getMode(), inputOI); objectInspectors.add(roi); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java index afd7bcf..5a88eee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec; import java.io.Serializable; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.UDTFCollector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -44,7 +46,6 @@ protected final Log LOG = LogFactory.getLog(this.getClass().getName()); - StructObjectInspector udtfInputOI = null; Object[] objToSendToUDTF = null; GenericUDTF genericUDTF; @@ -63,15 +64,22 @@ protected void initializeOp(Configuration hconf) throws HiveException { genericUDTF.setCollector(collector); - udtfInputOI = (StructObjectInspector) inputObjInspectors[0]; + List inputFields = + ((StructObjectInspector) inputObjInspectors[0]).getAllStructFieldRefs(); - objToSendToUDTF = new Object[udtfInputOI.getAllStructFieldRefs().size()]; + List udtfInputOIs = new ArrayList(); + for (int i = 0; i < inputFields.size(); i++) { + udtfInputOIs.add(inputFields.get(i).getFieldObjectInspector()); + } + objToSendToUDTF = new Object[inputFields.size()]; MapredContext context = MapredContext.get(); if (context != null) { context.setup(genericUDTF); } - StructObjectInspector udtfOutputOI = genericUDTF.initialize(udtfInputOI); + StructObjectInspector inputOI = + ObjectInspectorFactory.getStandardStructObjectInspector(conf.getColNames(), udtfInputOIs); + StructObjectInspector udtfOutputOI = genericUDTF.initialize(inputOI); if (conf.isOuterLV()) { outerObj = Arrays.asList(new Object[udtfOutputOI.getAllStructFieldRefs().size()]); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index d1d866f..8ce8cff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowFunctionSpec; import org.apache.hadoop.hive.ql.parse.WindowingSpec.WindowSpec; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.PTFDeserializer; @@ -574,23 +575,30 @@ else if (bndSpec instanceof RangeBoundarySpec) { throw new SemanticException("Unknown Boundary: " + bndSpec); } - static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException { + void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException { List args = def.getArgs(); + + List colNames = new ArrayList(); List argOIs = new ArrayList(); - ObjectInspector[] funcArgOIs = null; if (args != null) { - for (PTFExpressionDef arg : args) { + for (int i = 0; i < args.size(); i++) { + PTFExpressionDef arg = args.get(i); argOIs.add(arg.getOI()); + String recommended = ExprNodeDescUtils.recommendTrivialInputName(arg.getExprNode()); + if (recommended == null || colNames.contains(recommended)) { + recommended = "_wc" + (i + 1); + } + colNames.add(recommended); } - funcArgOIs = new ObjectInspector[args.size()]; - funcArgOIs = argOIs.toArray(funcArgOIs); } GenericUDAFEvaluator wFnEval = FunctionRegistry.getGenericWindowingEvaluator(def.getName(), argOIs, def.isDistinct(), def.isStar()); - ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, funcArgOIs); + StructObjectInspector inputOI = + ObjectInspectorFactory.getStandardStructObjectInspector(colNames, argOIs); + ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, inputOI); def.setWFnEval(wFnEval); def.setOI(OI); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 399f92a..a2c50eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -186,7 +186,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -3345,6 +3344,7 @@ private String recommendName(ExprNodeDesc exp, String colAlias) { * Class to store GenericUDAF related information. */ static class GenericUDAFInfo { + ArrayList parameterColNames; ArrayList convertedParameters; GenericUDAFEvaluator genericUDAFEvaluator; TypeInfo returnType; @@ -3418,7 +3418,7 @@ static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, * @throws SemanticException * when the UDAF is not found or has problems. */ - static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, + private GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, GenericUDAFEvaluator.Mode emode, ArrayList aggParameters) throws SemanticException { @@ -3428,15 +3428,16 @@ static GenericUDAFInfo getGenericUDAFInfo(GenericUDAFEvaluator evaluator, r.genericUDAFEvaluator = evaluator; // set r.returnType - ObjectInspector returnOI = null; try { ArrayList aggOIs = getWritableObjectInspector(aggParameters); - ObjectInspector[] aggOIArray = new ObjectInspector[aggOIs.size()]; - for (int ii = 0; ii < aggOIs.size(); ++ii) { - aggOIArray[ii] = aggOIs.get(ii); - } - returnOI = r.genericUDAFEvaluator.init(emode, aggOIArray); + ArrayList aggColNames = + ExprNodeDescUtils.recommendInputNames(aggParameters, autogenColAliasPrfxLbl); + + StructObjectInspector inputOI = + ObjectInspectorFactory.getStandardStructObjectInspector(aggColNames, aggOIs); + ObjectInspector returnOI = r.genericUDAFEvaluator.init(emode, inputOI); r.returnType = TypeInfoUtils.getTypeInfoFromObjectInspector(returnOI); + r.parameterColNames = aggColNames; } catch (HiveException e) { throw new SemanticException(e); } @@ -3626,7 +3627,7 @@ private Operator genGroupByPlanGroupByOperator(QBParseInfo parseInfo, GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); aggregations.add(new AggregationDesc(aggName.toLowerCase(), - udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, + udaf.genericUDAFEvaluator, udaf.parameterColNames, udaf.convertedParameters, isDistinct, amode)); String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); @@ -3907,7 +3908,7 @@ private Operator genGroupByPlanGroupByOperator1(QBParseInfo parseInfo, GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); aggregations.add(new AggregationDesc(aggName.toLowerCase(), - udaf.genericUDAFEvaluator, udaf.convertedParameters, + udaf.genericUDAFEvaluator, udaf.parameterColNames, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && isDistinct), amode)); String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); @@ -4075,7 +4076,7 @@ private Operator genGroupByPlanMapGroupByOperator(QB qb, GenericUDAFInfo udaf = getGenericUDAFInfo(genericUDAFEvaluator, amode, aggParameters); aggregations.add(new AggregationDesc(aggName.toLowerCase(), - udaf.genericUDAFEvaluator, udaf.convertedParameters, isDistinct, + udaf.genericUDAFEvaluator, udaf.parameterColNames, udaf.convertedParameters, isDistinct, amode)); String field = getColumnInternalName(groupByKeys.size() + aggregations.size() - 1); @@ -4609,6 +4610,7 @@ private Operator genGroupByPlanGroupByOperator2MR(QBParseInfo parseInfo, .add(new AggregationDesc( aggName.toLowerCase(), udaf.genericUDAFEvaluator, + udaf.parameterColNames, udaf.convertedParameters, (mode != GroupByDesc.Mode.FINAL && value.getToken().getType() == HiveParser.TOK_FUNCTIONDI), @@ -6214,22 +6216,21 @@ private Operator genUDTFPlan(GenericUDTF genericUDTF, // resulting output object inspector can be used to make the RowResolver // for the UDTF operator RowResolver selectRR = opParseCtx.get(input).getRowResolver(); - ArrayList inputCols = selectRR.getColumnInfos(); // Create the object inspector for the input columns and initialize the UDTF - ArrayList colNames = new ArrayList(); - ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()]; - for (int i = 0; i < inputCols.size(); i++) { - colNames.add(inputCols.get(i).getInternalName()); - colOIs[i] = inputCols.get(i).getObjectInspector(); + List colNames = new ArrayList(); + List colOIs = new ArrayList(); + for (ColumnInfo column : selectRR.getColumnInfos()) { + // use alias name for field name + colNames.add(selectRR.reverseLookup(column.getInternalName())[1]); + colOIs.add(column.getObjectInspector()); } - StandardStructObjectInspector rowOI = - ObjectInspectorFactory.getStandardStructObjectInspector(colNames, Arrays.asList(colOIs)); - StructObjectInspector outputOI = genericUDTF.initialize(rowOI); + StructObjectInspector outputOI = genericUDTF.initialize( + ObjectInspectorFactory.getStandardStructObjectInspector(colNames, colOIs)); int numUdtfCols = outputOI.getAllStructFieldRefs().size(); if (colAliases.isEmpty()) { - // user did not specfied alias names, infer names from outputOI + // user did not specified alias names, infer names from outputOI for (StructField field : outputOI.getAllStructFieldRefs()) { colAliases.add(field.getFieldName()); } @@ -6269,7 +6270,7 @@ private Operator genUDTFPlan(GenericUDTF genericUDTF, // Add the UDTFOperator to the operator DAG Operator udtf = putOpInsertMap(OperatorFactory.getAndMakeChild( - new UDTFDesc(genericUDTF, outerLV), new RowSchema(out_rwsch.getColumnInfos()), + new UDTFDesc(genericUDTF, colNames, outerLV), new RowSchema(out_rwsch.getColumnInfos()), input), out_rwsch); return udtf; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java index 17eeae1..acee63e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/AggregationDesc.java @@ -23,6 +23,8 @@ import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.ql.exec.PTFUtils; +import java.util.ArrayList; + import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; import org.apache.hadoop.util.ReflectionUtils; @@ -39,6 +41,7 @@ private static final long serialVersionUID = 1L; private String genericUDAFName; + private java.util.ArrayList colNames; private java.util.ArrayList parameters; private boolean distinct; private GenericUDAFEvaluator.Mode mode; @@ -55,9 +58,11 @@ public AggregationDesc() { public AggregationDesc(final String genericUDAFName, final GenericUDAFEvaluator genericUDAFEvaluator, + final java.util.ArrayList colNames, final java.util.ArrayList parameters, final boolean distinct, final GenericUDAFEvaluator.Mode mode) { this.genericUDAFName = genericUDAFName; + this.colNames = colNames; this.parameters = parameters; this.distinct = distinct; this.mode = mode; @@ -115,6 +120,14 @@ public void setGenericUDAFWritableEvaluator(GenericUDAFEvaluator genericUDAFWrit this.genericUDAFWritableEvaluator = genericUDAFWritableEvaluator; } + public ArrayList getColNames() { + return colNames; + } + + public void setColNames(ArrayList colNames) { + this.colNames = colNames; + } + public java.util.ArrayList getParameters() { return parameters; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java index f293c43..88a7b66 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeDescUtils.java @@ -164,6 +164,52 @@ public static String recommendInputName(ExprNodeDesc desc) { } /** + * Returns whatever name of sole ExprNodeColumnDesc in expression + */ + public static String recommendTrivialInputName(ExprNodeDesc desc) { + if (desc instanceof ExprNodeColumnDesc) { + return ((ExprNodeColumnDesc)desc).getColumn(); + } + List found = new ArrayList(); + if (!findSingleColumn(desc, found) || found.isEmpty()) { + return null; + } + return found.get(0).getColumn(); + } + + private static boolean findSingleColumn(ExprNodeDesc expr, List found) { + List children = expr.getChildren(); + if (children == null || children.isEmpty()) { + if (expr instanceof ExprNodeColumnDesc) { + found.add((ExprNodeColumnDesc) expr); + } + return found.size() <= 1; + } + for (ExprNodeDesc child : children) { + if (!findSingleColumn(child, found)) { + return false; + } + } + return true; + } + + /** + * Recommend column names for input expressions. + * if there is no recommendation, generate name by appending prefix provided. + */ + public static ArrayList recommendInputNames(List descs, String prefix) { + ArrayList colNames = new ArrayList(descs.size()); + for (int i = 0; i < descs.size(); i++) { + String recommended = recommendTrivialInputName(descs.get(i)); + if (recommended == null || colNames.contains(recommended)) { + recommended = prefix + (i + 1); + } + colNames.add(recommended); + } + return colNames; + } + + /** * Return false if the expression has any non deterministic function */ public static boolean isDeterministic(ExprNodeDesc desc) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java index 4b2c1ad..4de5d04 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeGenericFuncDesc.java @@ -38,7 +38,9 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -194,9 +196,9 @@ public ExprNodeDesc clone() { public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, String funcText, List children) throws UDFArgumentException { - ObjectInspector[] childrenOIs = new ObjectInspector[children.size()]; - for (int i = 0; i < childrenOIs.length; i++) { - childrenOIs[i] = children.get(i).getWritableObjectInspector(); + List childrenOIs = new ArrayList(children.size()); + for (ExprNodeDesc expr : children) { + childrenOIs.add(expr.getWritableObjectInspector()); } // Check if a bigint is implicitely cast to a double as part of a comparison @@ -229,7 +231,11 @@ public static ExprNodeGenericFuncDesc newInstance(GenericUDF genericUDF, } } - ObjectInspector oi = genericUDF.initializeAndFoldConstants(childrenOIs); + List colNames = ExprNodeDescUtils.recommendInputNames(children, "_c"); + + StructObjectInspector inputOI = + ObjectInspectorFactory.getStandardStructObjectInspector(colNames, childrenOIs); + ObjectInspector oi = genericUDF.initializeAndFoldConstants(inputOI); String[] requiredJars = genericUDF.getRequiredJars(); String[] requiredFiles = genericUDF.getRequiredFiles(); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java index f75bec5..2e3508f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/PTFDeserializer.java @@ -194,19 +194,26 @@ protected void initialize(PartitionedTableFunctionDef def) throws HiveException static void setupWdwFnEvaluator(WindowFunctionDef def) throws HiveException { List args = def.getArgs(); + + List colNames = new ArrayList(); List argOIs = new ArrayList(); - ObjectInspector[] funcArgOIs = null; if (args != null) { - for (PTFExpressionDef arg : args) { + for (int i = 0; i < args.size(); i++) { + PTFExpressionDef arg = args.get(i); argOIs.add(arg.getOI()); + String recommended = ExprNodeDescUtils.recommendTrivialInputName(arg.getExprNode()); + if (recommended == null || colNames.contains(recommended)) { + recommended = "_wc" + (i + 1); + } + colNames.add(recommended); } - funcArgOIs = new ObjectInspector[args.size()]; - funcArgOIs = argOIs.toArray(funcArgOIs); } GenericUDAFEvaluator wFnEval = def.getWFnEval(); - ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, funcArgOIs); + StructObjectInspector inputOI = + ObjectInspectorFactory.getStandardStructObjectInspector(colNames, argOIs); + ObjectInspector OI = wFnEval.init(GenericUDAFEvaluator.Mode.COMPLETE, inputOI); def.setWFnEval(wFnEval); def.setOI(OI); } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java index 741a0e0..1046ff0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/UDTFDesc.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.plan; +import java.util.List; + import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; /** @@ -31,13 +33,15 @@ private static final long serialVersionUID = 1L; private GenericUDTF genericUDTF; + private List colNames; private boolean outerLV; public UDTFDesc() { } - public UDTFDesc(final GenericUDTF genericUDTF, boolean outerLV) { + public UDTFDesc(GenericUDTF genericUDTF, List colNames, boolean outerLV) { this.genericUDTF = genericUDTF; + this.colNames = colNames; this.outerLV = outerLV; } @@ -54,6 +58,14 @@ public String getUDTFName() { return genericUDTF.toString(); } + public List getColNames() { + return colNames; + } + + public void setColNames(List colNames) { + this.colNames = colNames; + } + public boolean isOuterLV() { return outerLV; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java index 3bd97b0..f20bee3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEvaluator.java @@ -26,6 +26,8 @@ import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * A Generic User-defined aggregation function (GenericUDAF) for the use with @@ -99,6 +101,10 @@ public GenericUDAFEvaluator() { public void configure(MapredContext mapredContext) { } + public ObjectInspector init(Mode m, StructObjectInspector parameters) throws HiveException { + return init(m, ObjectInspectorUtils.toObjectInspectorArray(parameters)); + } + /** * Initialize the evaluator. * diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index e3fb558..acced5a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -20,6 +20,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.List; import org.apache.hadoop.hive.ql.exec.MapredContext; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -29,6 +30,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * A Generic User-defined function (GenericUDF) for the use with Hive. @@ -40,14 +43,14 @@ * variable length of arguments. 3. It can accept an infinite number of function * signature - for example, it's easy to write a GenericUDF that accepts * array, array> and so on (arbitrary levels of nesting). 4. It - * can do short-circuit evaluations using DeferedObject. + * can do short-circuit evaluations using DeferredObject. */ @UDFType(deterministic = true) public abstract class GenericUDF implements Closeable { /** - * A Defered Object allows us to do lazy-evaluation and short-circuiting. - * GenericUDF use DeferedObject to pass arguments. + * A Deferred Object allows us to do lazy-evaluation and short-circuiting. + * GenericUDF use DeferredObject to pass arguments. */ public static interface DeferredObject { void prepare(int version) throws HiveException; @@ -81,6 +84,11 @@ public Object get() throws HiveException { public GenericUDF() { } + public ObjectInspector initialize(StructObjectInspector inputOI) + throws UDFArgumentException { + return initialize(ObjectInspectorUtils.toObjectInspectorArray(inputOI)); + } + /** * Initialize this GenericUDF. This will be called once and only once per * GenericUDF instance. @@ -91,8 +99,9 @@ public GenericUDF() { * Thrown when arguments have wrong types, wrong length, etc. * @return The ObjectInspector for the return value */ - public abstract ObjectInspector initialize(ObjectInspector[] arguments) - throws UDFArgumentException; + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + throw new UDFArgumentException("should not be called directly"); + } /** * Additionally setup GenericUDF with MapredContext before initializing. @@ -110,7 +119,7 @@ public void configure(MapredContext context) { * ConstantObjectInspector returned. Otherwise, the function behaves exactly * like initialize(). */ - public ObjectInspector initializeAndFoldConstants(ObjectInspector[] arguments) + public ObjectInspector initializeAndFoldConstants(StructObjectInspector arguments) throws UDFArgumentException { ObjectInspector oi = initialize(arguments); @@ -122,9 +131,11 @@ public ObjectInspector initializeAndFoldConstants(ObjectInspector[] arguments) return oi; } + List fields = arguments.getAllStructFieldRefs(); + boolean allConstant = true; - for (int ii = 0; ii < arguments.length; ++ii) { - if (!ObjectInspectorUtils.isConstantObjectInspector(arguments[ii])) { + for (StructField field : fields) { + if (!ObjectInspectorUtils.isConstantObjectInspector(field.getFieldObjectInspector())) { allConstant = false; break; } @@ -135,11 +146,12 @@ public ObjectInspector initializeAndFoldConstants(ObjectInspector[] arguments) FunctionRegistry.isDeterministic(this) && !FunctionRegistry.isStateful(this) && ObjectInspectorUtils.supportsConstantObjectInspector(oi)) { - DeferredObject[] argumentValues = - new DeferredJavaObject[arguments.length]; - for (int ii = 0; ii < arguments.length; ++ii) { + + DeferredObject[] argumentValues = new DeferredJavaObject[fields.size()]; + for (int ii = 0; ii < argumentValues.length; ++ii) { + ObjectInspector colOI = fields.get(ii).getFieldObjectInspector(); argumentValues[ii] = new DeferredJavaObject( - ((ConstantObjectInspector)arguments[ii]).getWritableConstantValue()); + ((ConstantObjectInspector)colOI).getWritableConstantValue()); } try { Object constantValue = evaluate(argumentValues); @@ -168,7 +180,7 @@ public ObjectInspector initializeAndFoldConstants(ObjectInspector[] arguments) * Evaluate the GenericUDF with the arguments. * * @param arguments - * The arguments as DeferedObject, use DeferedObject.get() to get the + * The arguments as DeferredObject, use DeferredObject.get() to get the * actual argument Object. The Objects can be inspected by the * ObjectInspectors passed in the initialize call. * @return The @@ -181,6 +193,21 @@ public abstract Object evaluate(DeferredObject[] arguments) */ public abstract String getDisplayString(String[] children); + // simple utility method + protected String toDisplayString(String functionName, String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append(functionName); + sb.append('('); + for (int i = 0; i < children.length; i++) { + sb.append(children[i]); + if (i + 1 != children.length) { + sb.append(','); + } + } + sb.append(')'); + return sb.toString(); + } + /** * Close GenericUDF. * This is only called in runtime of MapRedTask. diff --git ql/src/test/queries/clientpositive/udf_col_names.q ql/src/test/queries/clientpositive/udf_col_names.q new file mode 100644 index 0000000..f89ad6a --- /dev/null +++ ql/src/test/queries/clientpositive/udf_col_names.q @@ -0,0 +1,15 @@ +CREATE TEMPORARY FUNCTION col_name_udf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFColumnNameTest'; +CREATE TEMPORARY FUNCTION col_name_udtf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDTFColumnNameTest'; +CREATE TEMPORARY FUNCTION col_name_udaf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFColumnNameTest'; + +explain +select col_name_udf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS); +select col_name_udf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS); + +explain +select col_name_udtf(key, key + value, cast(value as int), value + 1) AS (a, b, c, d) from src TABLESAMPLE (10 ROWS); +select col_name_udtf(key, key + value, cast(value as int), value + 1) AS (a, b, c, d) from src TABLESAMPLE (10 ROWS); + +explain +select col_name_udaf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) group by key; +select col_name_udaf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) group by key; diff --git ql/src/test/results/clientpositive/udf_col_names.q.out ql/src/test/results/clientpositive/udf_col_names.q.out new file mode 100644 index 0000000..05a37be --- /dev/null +++ ql/src/test/results/clientpositive/udf_col_names.q.out @@ -0,0 +1,206 @@ +PREHOOK: query: CREATE TEMPORARY FUNCTION col_name_udf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFColumnNameTest' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TEMPORARY FUNCTION col_name_udf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDFColumnNameTest' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: database:default +PREHOOK: query: CREATE TEMPORARY FUNCTION col_name_udtf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDTFColumnNameTest' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TEMPORARY FUNCTION col_name_udtf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDTFColumnNameTest' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: database:default +PREHOOK: query: CREATE TEMPORARY FUNCTION col_name_udaf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFColumnNameTest' +PREHOOK: type: CREATEFUNCTION +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TEMPORARY FUNCTION col_name_udaf AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFColumnNameTest' +POSTHOOK: type: CREATEFUNCTION +POSTHOOK: Output: database:default +PREHOOK: query: explain +select col_name_udf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select col_name_udf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: dummy_udf_for_printing_col_names(key,(key + value),UDFToInteger(value),(value + 1)) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select col_name_udf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select col_name_udf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +PREHOOK: query: explain +select col_name_udtf(key, key + value, cast(value as int), value + 1) AS (a, b, c, d) from src TABLESAMPLE (10 ROWS) +PREHOOK: type: QUERY +POSTHOOK: query: explain +select col_name_udtf(key, key + value, cast(value as int), value + 1) AS (a, b, c, d) from src TABLESAMPLE (10 ROWS) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), (key + value) (type: double), UDFToInteger(value) (type: int), (value + 1) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + function name: dummy_udtf_for_printing_col_names + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select col_name_udtf(key, key + value, cast(value as int), value + 1) AS (a, b, c, d) from src TABLESAMPLE (10 ROWS) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select col_name_udtf(key, key + value, cast(value as int), value + 1) AS (a, b, c, d) from src TABLESAMPLE (10 ROWS) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +key _c2 value _c4 +PREHOOK: query: explain +select col_name_udaf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select col_name_udaf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 10 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: col_name_udaf(key, (key + value), UDFToInteger(value), (value + 1)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: col_name_udaf(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select col_name_udaf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select col_name_udaf(key, key + value, cast(value as int), value + 1) from src TABLESAMPLE (10 ROWS) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 +key, _c2, value, _c4 diff --git ql/src/test/results/compiler/plan/groupby1.q.xml ql/src/test/results/compiler/plan/groupby1.q.xml index 1f53052..1453631 100755 --- ql/src/test/results/compiler/plan/groupby1.q.xml +++ ql/src/test/results/compiler/plan/groupby1.q.xml @@ -538,6 +538,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble @@ -1292,6 +1299,13 @@ + + + + VALUE._col0 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble diff --git ql/src/test/results/compiler/plan/groupby2.q.xml ql/src/test/results/compiler/plan/groupby2.q.xml index f9e1540..5dfa6b9 100755 --- ql/src/test/results/compiler/plan/groupby2.q.xml +++ ql/src/test/results/compiler/plan/groupby2.q.xml @@ -521,6 +521,13 @@ + + + + value + + + true @@ -587,6 +594,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble @@ -1478,6 +1492,13 @@ + + + + KEY._col1:0._col0 + + + true @@ -1510,6 +1531,13 @@ + + + + VALUE._col1 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble diff --git ql/src/test/results/compiler/plan/groupby3.q.xml ql/src/test/results/compiler/plan/groupby3.q.xml index ee32e0e..7d5edf7 100644 --- ql/src/test/results/compiler/plan/groupby3.q.xml +++ ql/src/test/results/compiler/plan/groupby3.q.xml @@ -556,6 +556,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble @@ -619,6 +626,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble @@ -680,6 +694,13 @@ + + + + value + + + true @@ -744,6 +765,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax$GenericUDAFMaxEvaluator @@ -805,6 +833,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin$GenericUDAFMinEvaluator @@ -1761,6 +1796,13 @@ + + + + VALUE._col0 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble @@ -1790,6 +1832,13 @@ + + + + VALUE._col1 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage$GenericUDAFAverageEvaluatorDouble @@ -1817,6 +1866,13 @@ + + + + KEY._col0:0._col0 + + + true @@ -1849,6 +1905,13 @@ + + + + VALUE._col3 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax$GenericUDAFMaxEvaluator @@ -1876,6 +1939,13 @@ + + + + VALUE._col4 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMin$GenericUDAFMinEvaluator diff --git ql/src/test/results/compiler/plan/groupby5.q.xml ql/src/test/results/compiler/plan/groupby5.q.xml index 7a63fd1..63ee8c8 100644 --- ql/src/test/results/compiler/plan/groupby5.q.xml +++ ql/src/test/results/compiler/plan/groupby5.q.xml @@ -390,6 +390,13 @@ + + + + value + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble @@ -1167,6 +1174,13 @@ + + + + VALUE._col0 + + + org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum$GenericUDAFSumDouble diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 1baf359..34a1332 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -69,9 +69,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.util.StringUtils; @@ -108,7 +106,7 @@ public static ObjectInspector getWritableObjectInspector(ObjectInspector oi) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (!(poi instanceof AbstractPrimitiveWritableObjectInspector)) { return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - (PrimitiveTypeInfo)poi.getTypeInfo()); + poi.getTypeInfo()); } } return oi; @@ -1192,6 +1190,15 @@ public static boolean hasAllFieldsSettable(ObjectInspector oi, return setOISettablePropertiesMap(oi, oiSettableProperties, returnValue); } + public static ObjectInspector[] toObjectInspectorArray(StructObjectInspector structOI) { + List fields = structOI.getAllStructFieldRefs(); + ObjectInspector[] ois = new ObjectInspector[fields.size()]; + for (int i = 0; i < ois.length; i++) { + ois[i] = fields.get(i).getFieldObjectInspector(); + } + return ois; + } + private ObjectInspectorUtils() { // prevent instantiation }