diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index a854f9f..3b3b4e3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -467,6 +467,7 @@ // Generic UDTF's system.registerGenericUDTF("explode", GenericUDTFExplode.class); + system.registerGenericUDTF("explodeByNumber", GenericUDTFExplodeByNumber.class); system.registerGenericUDTF("inline", GenericUDTFInline.class); system.registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class); system.registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplodeByNumber.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplodeByNumber.java new file mode 100644 index 0000000..f152db0 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplodeByNumber.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + +/** + * Takes a row of data and repeats n times. + */ +@Description(name = "explodeByNumber", value = "_FUNC_(n, cols...) - turns 1 row into n rows") +public class GenericUDTFExplodeByNumber extends GenericUDTF { + @Override + public void close() throws HiveException { + } + + private transient List argOIs = new ArrayList(); + private transient Object[] forwardObj = null; + + @Override + public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { + if (args.length < 2) { + throw new UDFArgumentException("UDTFExplodeByNumber() expects at least two arguments."); + } + if (!(args[0] instanceof LongObjectInspector)) { + throw new UDFArgumentException( + "The first argument to UDTFExplodeByNumber() must be a long (got " + + args[0].getTypeName() + " instead)."); + } + + ArrayList fieldNames = new ArrayList(); + ArrayList fieldOIs = new ArrayList(); + for (int index = 0; index < args.length; ++index) { + fieldNames.add("col" + index); + fieldOIs.add(args[index]); + } + argOIs = fieldOIs; + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + } + + @Override + public void process(Object[] args) throws HiveException, UDFArgumentException { + + long numRows = ((LongObjectInspector) argOIs.get(0)).get(args[0]); + + if (numRows < 1) { + throw new UDFArgumentException("UDTFExplodeByNumber() expects its first argument to be >= 1."); + } + + for (long n = 0; n < numRows; n++) { + forward(args); + } + } + + @Override + public String toString() { + return "UDTFExplodeByNumber"; + } + +} diff --git a/ql/src/test/queries/clientpositive/udtf_explode_number.q b/ql/src/test/queries/clientpositive/udtf_explode_number.q new file mode 100644 index 0000000..a2b89b5 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udtf_explode_number.q @@ -0,0 +1,11 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=false; + +DESCRIBE FUNCTION explodeByNumber; +DESCRIBE FUNCTION EXTENDED explodeByNumber; + +create table t (x bigint, y string); + +insert into table t values (3,'2'); + +SELECT explodeByNumber(x,y) FROM t; diff --git a/ql/src/test/results/clientpositive/udtf_explode_number.q.out b/ql/src/test/results/clientpositive/udtf_explode_number.q.out new file mode 100644 index 0000000..3ac7033 --- /dev/null +++ b/ql/src/test/results/clientpositive/udtf_explode_number.q.out @@ -0,0 +1,40 @@ +PREHOOK: query: DESCRIBE FUNCTION explodeByNumber +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION explodeByNumber +POSTHOOK: type: DESCFUNCTION +explodeByNumber(n, cols...) - turns 1 row into n rows +PREHOOK: query: DESCRIBE FUNCTION EXTENDED explodeByNumber +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED explodeByNumber +POSTHOOK: type: DESCFUNCTION +explodeByNumber(n, cols...) - turns 1 row into n rows +Synonyms: explodebynumber +PREHOOK: query: create table t (x bigint, y string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (x bigint, y string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t values (3,'2') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t values (3,'2') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.y SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: SELECT explodeByNumber(x,y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT explodeByNumber(x,y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 +3 2 +3 2