Index: ql/src/test/results/clientpositive/udtf_explode.q.out =================================================================== --- ql/src/test/results/clientpositive/udtf_explode.q.out (revision 0) +++ ql/src/test/results/clientpositive/udtf_explode.q.out (revision 0) @@ -0,0 +1,75 @@ +PREHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION explode (TOK_FUNCTION array 1 2 3)) myCol)) (TOK_LIMIT 3))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: [1,2,3] + type: array + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/1349738108/10001 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns myCol + serialization.format 1 + columns.types int + Needs Tagging: false + Path -> Alias: + file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/test/data/warehouse/src [src] + Path -> Partition: + file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/test/data/warehouse/src + Partition + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + name src + columns.types string:string + serialization.ddl struct src { string key, string value} + serialization.format 1 + columns key,value + bucket_count -1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/test/data/warehouse/src + transient_lastDdlTime 1257229644 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + + Stage: Stage-0 + Fetch Operator + limit: 3 + + +PREHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/793830062/10000 +POSTHOOK: query: SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/data/users/pyang/trunk/VENDOR.hive/trunk/build/ql/tmp/793830062/10000 +1 +2 +3 Index: ql/src/test/queries/clientpositive/udtf_explode.q =================================================================== --- ql/src/test/queries/clientpositive/udtf_explode.q (revision 0) +++ ql/src/test/queries/clientpositive/udtf_explode.q (revision 0) @@ -0,0 +1,3 @@ +EXPLAIN EXTENDED SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3; + +SELECT explode(array(1,2,3)) AS myCol FROM src LIMIT 3; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (revision 1928) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ColumnPrunerProcFactory.java (working copy) @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.UDTFOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; @@ -253,7 +254,7 @@ // Without this break, a bug in ReduceSink to Extract edge column pruning will manifest // which should be fixed before remove this if ((child instanceof FileSinkOperator) - || (child instanceof ScriptOperator) + || (child instanceof ScriptOperator) ||(child instanceof UDTFOperator) || (child instanceof LimitOperator) || (child instanceof UnionOperator)) { cppCtx.getPrunedColLists().put(op, cppCtx.getColsFromSelectExpr(op)); return null; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java (revision 1928) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionInfo.java (working copy) @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; public class FunctionInfo { @@ -29,13 +30,18 @@ private GenericUDF genericUDF; + private GenericUDTF genericUDTF; + private GenericUDAFResolver genericUDAFResolver; + + public FunctionInfo(boolean isNative, String displayName, GenericUDF genericUDF) { this.isNative = isNative; this.displayName = displayName; this.genericUDF = genericUDF; this.genericUDAFResolver = null; + this.genericUDTF = null; } public FunctionInfo(boolean isNative, String displayName, GenericUDAFResolver genericUDAFResolver) { @@ -43,8 +49,17 @@ this.displayName = displayName; this.genericUDF = null; this.genericUDAFResolver = genericUDAFResolver; + this.genericUDTF = null; } + public FunctionInfo(boolean isNative, String displayName, GenericUDTF genericUDTF) { + this.isNative = isNative; + this.displayName = displayName; + this.genericUDF = null; + this.genericUDAFResolver = null; + this.genericUDTF = genericUDTF; + } + /** * Get a new GenericUDF object for the function. */ @@ -54,6 +69,16 @@ } /** + * Get a new GenericUDTF object for the function. + */ + public GenericUDTF getGenericUDTF() { + // GenericUDTF is stateful - we have to make a copy here + if(genericUDTF == null) + return null; + return FunctionRegistry.cloneGenericUDTF(genericUDTF); + } + + /** * Get the GenericUDAFResolver object for the function. */ public GenericUDAFResolver getGenericUDAFResolver() { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1969) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -212,6 +212,12 @@ registerGenericUDF("locate", GenericUDFLocate.class); registerGenericUDF("elt", GenericUDFElt.class); registerGenericUDF("concat_ws", GenericUDFConcatWS.class); + registerGenericUDF("array", GenericUDFArray.class); + registerGenericUDF("map", GenericUDFMap.class); + + // Generic UDTF's + registerGenericUDTF("pass", GenericUDTFPass.class); + registerGenericUDTF("explode", GenericUDTFExplode.class); } public static void registerTemporaryUDF(String functionName, Class UDFClass, @@ -264,6 +270,21 @@ } } + static void registerGenericUDTF(String functionName, Class genericUDTFClass) { + registerGenericUDTF(true, functionName, genericUDTFClass); + } + + public static void registerGenericUDTF(boolean isNative, String functionName, Class genericUDTFClass) { + if (GenericUDTF.class.isAssignableFrom(genericUDTFClass)) { + FunctionInfo fI = new FunctionInfo(isNative, functionName, + (GenericUDTF)ReflectionUtils.newInstance(genericUDTFClass, null)); + mFunctions.put(functionName.toLowerCase(), fI); + } else { + throw new RuntimeException("Registering GenericUDF Class " + genericUDTFClass + + " which does not extends " + GenericUDTF.class); + } + } + public static FunctionInfo getFunctionInfo(String functionName) { return mFunctions.get(functionName.toLowerCase()); } @@ -626,6 +647,13 @@ } /** + * Create a copy of an existing GenericUDTF. + */ + public static GenericUDTF cloneGenericUDTF(GenericUDTF genericUDTF) { + return (GenericUDTF)ReflectionUtils.newInstance(genericUDTF.getClass(), null); + } + + /** * Get the UDF class from an exprNodeDesc. * Returns null if the exprNodeDesc does not contain a UDF class. */ Index: ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/UDTFOperator.java (revision 0) @@ -0,0 +1,75 @@ +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.udtfDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; + +public class UDTFOperator extends Operator implements Serializable { + private static final long serialVersionUID = 1L; + + protected final Log LOG = LogFactory.getLog(this.getClass().getName()); + Object [] fw = new Object[1]; + protected void initializeOp(Configuration hconf) throws HiveException { + conf.getUDTF().setOp(this); + + // Create an array of ObjectInspectors to initialize the UDTF + List inputFields = + ((StandardStructObjectInspector)inputObjInspectors[0]).getAllStructFieldRefs(); + ObjectInspector [] inputOIs = new ObjectInspector[inputFields.size()]; + for(int i=0; i colNames = new ArrayList(); + ArrayList colOIs = new ArrayList(); + colNames.add(conf.getOutputColName()); + colOIs.add(outputOI); + + outputObjInspector = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, colOIs); + + // Initialize the rest of the operator DAG + super.initializeOp(hconf); + } + + public void processOp(Object row, int tag) throws HiveException { + // Rows are passed as structs, but GenericUDTF's expect arrays of Objects. + // Convert the row and pass on. + StandardStructObjectInspector soi = (StandardStructObjectInspector) inputObjInspectors[tag]; + List rowList = soi.getStructFieldsDataAsList(row); + conf.getUDTF().process(rowList.toArray()); + } + + public void forwardUDTFOutput(Object o) throws HiveException { + fw[0] = o; + forward(fw, outputObjInspector); + } + + public String getName() { + return "UDTF"; + } + + public int getType() { + return OperatorType.UDTF; + } + + protected void closeOp(boolean abort) throws HiveException { + conf.getUDTF().close(); + } +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (revision 1928) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (working copy) @@ -21,9 +21,12 @@ import java.util.*; import java.io.*; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.plan.*; public class OperatorFactory { + static final Log LOG; public final static class opTuple { public Class descClass; @@ -52,6 +55,9 @@ opvec.add(new opTuple (limitDesc.class, LimitOperator.class)); opvec.add(new opTuple (tableScanDesc.class, TableScanOperator.class)); opvec.add(new opTuple (unionDesc.class, UnionOperator.class)); + opvec.add(new opTuple (udtfDesc.class, UDTFOperator.class)); + + LOG = LogFactory.getLog(OperatorFactory.class.getName()); } @@ -120,6 +126,7 @@ */ public static Operator getAndMakeChild(T conf, Operator ... oplist) { Operator ret = get((Class )conf.getClass()); + LOG.debug("getAndMakeChild created" + ret.toString()); ret.setConf(conf); if(oplist.length == 0) return (ret); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1928) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -446,7 +446,7 @@ logStats(); if(childOperators == null) return; - + LOG.info("attempting to close childOperators with abort =" + (abort ? "true" : "false")); for(Operator op: childOperators) { op.close(abort); } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/udtfDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/udtfDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/udtfDesc.java (revision 0) @@ -0,0 +1,33 @@ +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; + +public class udtfDesc implements Serializable { + private static final long serialVersionUID = 1L; + + private GenericUDTF genericUDTF; + private String outputColName; + + public udtfDesc() { } + public udtfDesc(final GenericUDTF genericUDTF, String outputColName) { + this.genericUDTF = genericUDTF; + this.outputColName = outputColName; + } + + public GenericUDTF getUDTF() { + return this.genericUDTF; + } + public void setUDTF(final GenericUDTF genericUDTF) { + this.genericUDTF=genericUDTF; + } + + public String getOutputColName() { + return this.outputColName; + } + + public void setOutputColName(String outputColName) { + this.outputColName = outputColName; + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 1928) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -1015,7 +1015,7 @@ @init { msgs.push("function name"); } @after { msgs.pop(); } : // Keyword IF is also a function name - Identifier | KW_IF + Identifier | KW_IF | KW_ARRAY | KW_MAP ; castExpression Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1928) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -20,13 +20,10 @@ import java.io.Serializable; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Formatter; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -46,6 +43,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.ExecDriver; +import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.GroupByOperator; import org.apache.hadoop.hive.ql.exec.JoinOperator; @@ -61,10 +59,8 @@ import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; import org.apache.hadoop.hive.ql.exec.UnionOperator; -import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; -import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; import org.apache.hadoop.hive.ql.lib.Dispatcher; @@ -91,6 +87,7 @@ import org.apache.hadoop.hive.ql.optimizer.unionproc.UnionProcContext; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink3; import org.apache.hadoop.hive.ql.optimizer.GenMRRedSink4; +import org.apache.hadoop.hive.ql.parse.TypeCheckProcFactory.DefaultExprProcessor; import org.apache.hadoop.hive.ql.plan.PlanUtils; import org.apache.hadoop.hive.ql.plan.aggregationDesc; import org.apache.hadoop.hive.ql.plan.exprNodeColumnDesc; @@ -116,17 +113,22 @@ import org.apache.hadoop.hive.ql.plan.selectDesc; import org.apache.hadoop.hive.ql.plan.tableDesc; import org.apache.hadoop.hive.ql.plan.tableScanDesc; +import org.apache.hadoop.hive.ql.plan.udtfDesc; import org.apache.hadoop.hive.ql.plan.unionDesc; import org.apache.hadoop.hive.ql.ppd.PredicatePushDown; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFHash; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -136,19 +138,17 @@ import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.hive.serde.Constants; -import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.ql.exec.TextRecordReader; import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; -import java.util.regex.Pattern; import java.util.regex.Matcher; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; -import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; @@ -163,7 +163,6 @@ import org.apache.hadoop.hive.ql.exec.FetchOperator; import java.util.Collection; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; -import org.apache.hadoop.hive.ql.hooks.WriteEntity; /** * Implementation of the semantic analyzer @@ -1345,19 +1344,59 @@ if (hintPresent) { posn++; } - + boolean isInTransform = (selExprList.getChild(posn).getChild(0).getType() == HiveParser.TOK_TRANSFORM); + if (isInTransform) { trfm = (ASTNode) selExprList.getChild(posn).getChild(0); } + + // Detect a UDTF by looking up the function name in the registry. + // Not as clean TRANSFORM due to the lack of a special token. + boolean isUDTF = false; + String udtfColName = null; + ASTNode udtfExpr = (ASTNode) selExprList.getChild(posn).getChild(0); + GenericUDTF genericUDTF = null; + if(udtfExpr.getType() == HiveParser.TOK_FUNCTION) { + String funcName = + TypeCheckProcFactory.DefaultExprProcessor.getFunctionText(udtfExpr, true); + FunctionInfo fi = FunctionRegistry.getFunctionInfo(funcName); + genericUDTF = fi.getGenericUDTF(); + isUDTF = (genericUDTF != null); + } + + if(isUDTF) { + // Only support a single expression when it's a UDTF + if(selExprList.getChildCount() > 1) { + throw new SemanticException("Only one expression in SELECT is supported with UDTF's"); + } + // Require an AS for UDTFs + if(((ASTNode) selExprList.getChild(posn)).getChildCount() != 2 || + selExprList.getChild(posn).getChild(1).getType() != HiveParser.Identifier ){ + throw new SemanticException("UDTF's require an AS clause"); + } + udtfColName = unescapeIdentifier(selExprList.getChild(posn).getChild(1).getText()); + } + + // The list of expressions after SELECT or SELECT TRANSFORM. - ASTNode exprList = (isInTransform ? (ASTNode) trfm.getChild(0) : selExprList); + ASTNode exprList; + if(isInTransform) { + exprList = (ASTNode) trfm.getChild(0); + } else if(isUDTF) { + exprList = (ASTNode) udtfExpr; + } else { + exprList = selExprList; + } + LOG.debug("genSelectPlan: input = " + inputRR.toString()); // Iterate over all expression (either after SELECT, or in SELECT TRANSFORM) - for (int i = posn; i < exprList.getChildCount(); ++i) { + // When looking at a UDTF, the function name should be skipped over so + // posn is starts at +1 + for (int i = (isUDTF ? posn + 1 : posn); i < exprList.getChildCount(); ++i) { // child can be EXPR AS ALIAS, or EXPR. ASTNode child = (ASTNode) exprList.getChild(i); @@ -1367,7 +1406,7 @@ String tabAlias; String colAlias; - if (isInTransform) { + if (isInTransform || isUDTF) { tabAlias = null; colAlias = "_C" + i; expr = child; @@ -1438,10 +1477,14 @@ input), out_rwsch); output.setColumnExprMap(colExprMap); + if (isInTransform) { output = genScriptPlan(trfm, qb, output); } + if(isUDTF) { + output = genUDTFPlan(genericUDTF, udtfColName, qb, output); + } LOG.debug("Created Select Plan for clause: " + dest + " row schema: " + out_rwsch.toString()); return output; @@ -2816,6 +2859,45 @@ return limitMap; } + private Operator genUDTFPlan(GenericUDTF genericUDTF, String udtfColName, + QB qb, Operator input) throws SemanticException { + + // Use the row resolver from the input operator to generate a ObjectInspector that can be used + // to initialize the UDTF. Then, the resulting output object inspector can be used to make + // the RowResolver + RowResolver selectRR = opParseCtx.get(input).getRR(); + Vector cols = selectRR.getColumnInfos(); + + ArrayList colNames = new ArrayList(); + ObjectInspector [] colOIs = new ObjectInspector[cols.size()]; + for(int i=0; i outputCols = new ArrayList(); + + outputCols.add(new ColumnInfo(udtfColName, + TypeInfoUtils.getTypeInfoFromObjectInspector(outputOI), null, false)); + + RowResolver out_rwsch = new RowResolver(); + for(int i=0; i opRules = new LinkedHashMap(); - StringBuilder sb = new StringBuilder(); - Formatter fm = new Formatter(sb); + opRules.put(new RuleRegExp("R1", HiveParser.TOK_NULL + "%"), TypeCheckProcFactory.getNullExprProcessor()); opRules.put(new RuleRegExp("R2", HiveParser.Number + "%"), TypeCheckProcFactory.getNumExprProcessor()); opRules.put(new RuleRegExp("R3", HiveParser.Identifier + "%|" + Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFMap.java (revision 0) @@ -0,0 +1,129 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.HashMap; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.exec.description; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; + +@description( + name = "map", + value = "_FUNC_(key0, value0, key1, value1...) - Creates a map with the given key/value pairs " +) + +public class GenericUDFMap extends GenericUDF { + Converter[] converters; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + if (arguments.length % 2 != 0) { + throw new UDFArgumentLengthException( + "Arguments must be in key/value pairs"); + } + + GenericUDFUtils.ReturnObjectInspectorResolver keyOIResolver = + new GenericUDFUtils.ReturnObjectInspectorResolver(true); + GenericUDFUtils.ReturnObjectInspectorResolver valueOIResolver = + new GenericUDFUtils.ReturnObjectInspectorResolver(true); + + for(int i=0; i ret = new HashMap(); + + for(int i=0; i ret = new ArrayList(); + for(int i=0; i list = listOI.getList(o[0]); + for(Object r : list) { + this.forward(r); + } + } + +} Index: ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java =================================================================== --- ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (revision 1928) +++ ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (working copy) @@ -27,6 +27,7 @@ public static final int FILESINK = 10; public static final int REDUCESINK = 11; public static final int UNION = 12; + public static final int UDTF = 13; public static final IntRangeSet VALID_VALUES = new IntRangeSet( JOIN, @@ -41,7 +42,8 @@ TABLESCAN, FILESINK, REDUCESINK, - UNION ); + UNION, + UDTF); public static final Map VALUES_TO_NAMES = new HashMap() {{ put(JOIN, "JOIN"); @@ -57,5 +59,6 @@ put(FILESINK, "FILESINK"); put(REDUCESINK, "REDUCESINK"); put(UNION, "UNION"); + put(UDTF, "UDTF"); }}; } Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFPass.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFPass.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFPass.java (revision 0) @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; + +/** + * GenericUDTFPass simply passes the first column. Used for testing purposes + */ +public class GenericUDTFPass extends GenericUDTF { + + StandardStructObjectInspector inputOI; + String fieldName; + ObjectInspector fieldOI; + Object [] output = new Object[1]; + ObjectInspector [] returnOI = new ObjectInspector[1]; + /* (non-Javadoc) + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDTF#close() + */ + @Override + public void close() { + + } + + /* (non-Javadoc) + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDTF#initialize(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) + */ + @Override + public ObjectInspector initialize(ObjectInspector [] argOIs) throws UDFArgumentException { + return argOIs[0]; +// inputOI = (StandardStructObjectInspector) rowOI; +// fieldName = inputOI.getAllStructFieldRefs().get(0).getFieldName(); +// fieldOI = inputOI.getAllStructFieldRefs().get(0).getFieldObjectInspector(); +// +// ArrayList fieldNames = new ArrayList(); +// fieldNames.add(fieldName); +// ArrayList fieldOIs = new ArrayList(); +// fieldOIs.add(fieldOI); +// +// outputOI = ObjectInspectorFactory.getStandardStructObjectInspector( +// fieldNames, fieldOIs); +// return outputOI; + } + + /* (non-Javadoc) + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDTF#process(java.lang.Object) + */ + @Override + public void process(Object [] o) throws HiveException{ + this.forward(o[0]); + } + +}