Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnInfo.java @@ -20,8 +20,10 @@ import java.io.Serializable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * Implementation for ColumnInfo which contains the internal name for the column @@ -48,7 +50,7 @@ */ private boolean isVirtualCol; - private transient TypeInfo type; + private transient ObjectInspector objectInspector; private boolean isHiddenVirtualCol; @@ -69,8 +71,22 @@ public ColumnInfo(String internalName, TypeInfo type, String tabAlias, boolean isVirtualCol, boolean isHiddenVirtualCol) { + this(internalName, + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(type), + tabAlias, + isVirtualCol, + isHiddenVirtualCol); + } + + public ColumnInfo(String internalName, ObjectInspector objectInspector, + String tabAlias, boolean isVirtualCol) { + this(internalName, objectInspector, tabAlias, isVirtualCol, false); + } + + public ColumnInfo(String internalName, ObjectInspector objectInspector, + String tabAlias, boolean isVirtualCol, boolean isHiddenVirtualCol) { this.internalName = internalName; - this.type = type; + this.objectInspector = objectInspector; this.tabAlias = tabAlias; this.isVirtualCol = isVirtualCol; this.isHiddenVirtualCol = isHiddenVirtualCol; @@ -77,7 +93,11 @@ } public TypeInfo getType() { - return type; + return TypeInfoUtils.getTypeInfoFromObjectInspector(objectInspector); + } + + public ObjectInspector getObjectInspector() { + return objectInspector; } public String getInternalName() { @@ -85,7 +105,8 @@ } public void setType(TypeInfo type) { - this.type = type; + objectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(type); } public void setInternalName(String internalName) { @@ -109,7 +130,7 @@ */ @Override public String toString() { - return internalName + ": " + type; + return internalName + ": " + objectInspector.getTypeName(); } public void setAlias(String col_alias) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -192,6 +192,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFJSONTuple; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFParseUrlTuple; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFStack; import org.apache.hadoop.hive.ql.udf.generic.SimpleGenericUDAFParameterInfo; import org.apache.hadoop.hive.ql.udf.xml.GenericUDFXPath; import org.apache.hadoop.hive.ql.udf.xml.UDFXPathBoolean; @@ -439,6 +440,7 @@ registerGenericUDTF("explode", GenericUDTFExplode.class); registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class); registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class); + registerGenericUDTF("stack", GenericUDTFStack.class); } public static void registerTemporaryUDF(String functionName, Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -2249,7 +2249,8 @@ } out_rwsch.put(tabAlias, colAlias, new ColumnInfo( - getColumnInternalName(pos), exp.getTypeInfo(), tabAlias, false)); + getColumnInternalName(pos), exp.getWritableObjectInspector(), + tabAlias, false)); pos = Integer.valueOf(pos.intValue() + 1); } @@ -4251,9 +4252,7 @@ ObjectInspector[] colOIs = new ObjectInspector[inputCols.size()]; for (int i = 0; i < inputCols.size(); i++) { colNames.add(inputCols.get(i).getInternalName()); - colOIs[i] = TypeInfoUtils - .getStandardWritableObjectInspectorFromTypeInfo(inputCols.get(i) - .getType()); + colOIs[i] = inputCols.get(i).getObjectInspector(); } StructObjectInspector outputOI = genericUDTF.initialize(colOIs); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java =================================================================== --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFStack.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector; +import org.apache.hadoop.io.IntWritable; + +/** + * Takes a row of size k of data and splits it into n rows of data. For + * example, if n is 3 then the rest of the arguments are split in order into 3 + * rows, each of which has k/3 columns in it (the first emitted row has the + * first k/3, the second has the second, etc). If n does not divide k then the + * remaining columns are padded with NULLs. + */ +@Description( + name = "stack", + value = "_FUNC_(n, cols...) - turns k columns into n rows of size k/n each" +) +public class GenericUDTFStack extends GenericUDTF { + @Override + public void close() throws HiveException { + } + + ArrayList argOIs = new ArrayList(); + Object[] forwardObj = null; + ArrayList returnOIResolvers = + new ArrayList(); + IntWritable numRows = null; + Integer numCols = null; + + @Override + public StructObjectInspector initialize(ObjectInspector[] args) + throws UDFArgumentException { + if (args.length < 2) { + throw new UDFArgumentException("STACK() expects at least two arguments."); + } + if (!(args[0] instanceof WritableConstantIntObjectInspector)) { + throw new UDFArgumentException( + "The first argument to STACK() must be a constant integer (got " + + args[0].getTypeName() + " instead)."); + } + numRows = + ((WritableConstantIntObjectInspector)args[0]).getWritableConstantValue(); + + if (numRows == null || numRows.get() < 1) { + throw new UDFArgumentException( + "STACK() expects its first argument to be >= 1."); + } + + // Divide and round up. + numCols = (args.length - 1 + numRows.get() - 1) / numRows.get(); + + for (int jj = 0; jj < numCols; ++jj) { + returnOIResolvers.add(new ReturnObjectInspectorResolver()); + for (int ii = 0; ii < numRows.get(); ++ii) { + int index = ii * numCols + jj + 1; + if (index < args.length && + !returnOIResolvers.get(jj).update(args[index])) { + throw new UDFArgumentException( + "Argument " + (jj + 1) + "'s type (" + + args[jj + 1].getTypeName() + ") should be equal to argument " + + index + "'s type (" + args[index].getTypeName() + ")"); + } + } + } + + forwardObj = new Object[numCols]; + for (int ii = 0; ii < args.length; ++ii) { + argOIs.add(args[ii]); + } + + ArrayList fieldNames = new ArrayList(); + ArrayList fieldOIs = new ArrayList(); + for (int ii = 0; ii < numCols; ++ii) { + fieldNames.add("col" + ii); + fieldOIs.add(returnOIResolvers.get(ii).get()); + } + + return ObjectInspectorFactory.getStandardStructObjectInspector( + fieldNames, fieldOIs); + } + + @Override + public void process(Object[] args) + throws HiveException, UDFArgumentException { + for (int ii = 0; ii < numRows.get(); ++ii) { + for (int jj = 0; jj < numCols; ++jj) { + int index = ii * numCols + jj + 1; + if (index < args.length) { + forwardObj[jj] = + returnOIResolvers.get(jj).convertIfNecessary(args[index], argOIs.get(index)); + } else { + forwardObj[ii] = null; + } + } + forward(forwardObj); + } + } + + @Override + public String toString() { + return "stack"; + } +} Index: ql/src/test/queries/clientpositive/udtf_stack.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/udtf_stack.q @@ -0,0 +1,7 @@ +DESCRIBE FUNCTION stack; + +EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2; +EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2; + +SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2; +SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2; Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -134,6 +134,7 @@ space split sqrt +stack std stddev stddev_pop Index: ql/src/test/results/clientpositive/udtf_stack.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/udtf_stack.q.out @@ -0,0 +1,171 @@ +PREHOOK: query: DESCRIBE FUNCTION stack +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION stack +POSTHOOK: type: DESCFUNCTION +stack(n, cols...) - turns k columns into n rows of size k/n each +PREHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION STACK 2 'x' (TOK_FUNCTION array 1) 'z') x y (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_LIMIT 2))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col3 + type: array + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: 2 + type: int + expr: 'x' + type: string + expr: array(1) + type: array + expr: 'z' + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + UDTF Operator + function name: stack + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col3 + type: array + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 2 + + +PREHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION STACK 2 'x' (TOK_FUNCTION array 1) 'z' (TOK_FUNCTION array 4)) x y (TOK_TABALIAS a))) (TOK_TABREF (TOK_TABNAME src)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_LIMIT 2))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Lateral View Forward + Select Operator + SELECT * : (no compute) + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col3 + type: array + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Select Operator + expressions: + expr: 2 + type: int + expr: 'x' + type: string + expr: array(1) + type: array + expr: 'z' + type: string + expr: array(4) + type: array + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + UDTF Operator + function name: stack + Lateral View Join Operator + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: + expr: _col2 + type: string + expr: _col3 + type: array + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 2 + + +PREHOOK: query: SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/71/h_j6fpg10r33hvx1lcxlgttcw61_4s/T/jonchang/hive_2011-10-19_16-00-57_244_5388245899892483193/-mr-10000 +POSTHOOK: query: SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/71/h_j6fpg10r33hvx1lcxlgttcw61_4s/T/jonchang/hive_2011-10-19_16-00-57_244_5388245899892483193/-mr-10000 +x [1] +z null +PREHOOK: query: SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/71/h_j6fpg10r33hvx1lcxlgttcw61_4s/T/jonchang/hive_2011-10-19_16-01-01_819_5490987258076691735/-mr-10000 +POSTHOOK: query: SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/71/h_j6fpg10r33hvx1lcxlgttcw61_4s/T/jonchang/hive_2011-10-19_16-01-01_819_5490987258076691735/-mr-10000 +x [1] +z [4]