diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index bbe7fb0697..61d598c971 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -530,6 +530,7 @@ system.registerGenericUDTF("posexplode", GenericUDTFPosExplode.class); system.registerGenericUDTF("stack", GenericUDTFStack.class); system.registerGenericUDTF("get_splits", GenericUDTFGetSplits.class); + system.registerGenericUDTF("get_schema", GenericUDTFGetSchema.class); //PTF declarations system.registerGenericUDF(LEAD_FUNC_NAME, GenericUDFLead.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSchema.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSchema.java new file mode 100644 index 0000000000..7db5ce0df8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSchema.java @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.ParseException; +import org.apache.hadoop.hive.ql.parse.ParseUtils; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.ql.udf.UDFType; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.mapred.JobConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * GenericUDTFGetSchema. + */ +@Description(name = "get_schema", value = "_FUNC_(string) - " + + "Takes query as argument. Returns schema (column names and types) of the resultset " + + " that would be generated when the query is executed. " + + "Can be invoked like: select get_schema(\"select * from some_table\")." + + "NOTE: This does not produce any output for DDL queries like show tables/databases/... and others.") +@UDFType(deterministic = false) +public class GenericUDTFGetSchema extends GenericUDTF { + private static final Logger LOG = LoggerFactory.getLogger(GenericUDTFGetSchema.class); + + protected transient StringObjectInspector stringOI; + protected transient JobConf jc; + + private transient final Object[] nameTypePair = new Object[2]; + + @Override + public void process(Object[] arguments) throws HiveException { + + String query = stringOI.getPrimitiveJavaObject(arguments[0]); + LOG.debug("Getting schema for Query: {}", query); + HiveConf conf = new HiveConf(SessionState.get().getConf()); + List fieldSchemas = null; + try { + fieldSchemas = ParseUtils.parseQueryAndGetSchema(conf, query); + } catch (IOException | ParseException e) { + throw new HiveException(e); + } + + if (fieldSchemas != null) { + for (FieldSchema fieldSchema : fieldSchemas) { + nameTypePair[0] = fieldSchema.getName().getBytes(); + nameTypePair[1] = fieldSchema.getType().getBytes(); + forward(nameTypePair); + } + } + } + + @Override + public StructObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + + LOG.debug("initializing GenericUDTFGetSchema"); + + if (SessionState.get() == null || SessionState.get().getConf() == null) { + throw new IllegalStateException("Cannot run get schema outside HS2"); + } + LOG.debug("Initialized conf, jc and metastore connection"); + + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "The function GET_SCHEMA accepts 1 argument."); + } else if (!(arguments[0] instanceof StringObjectInspector)) { + LOG.error("Got " + arguments[0].getTypeName() + " instead of string."); + throw new UDFArgumentTypeException(0, "\"" + + "string\" is expected at function GET_SCHEMA, " + "but \"" + + arguments[0].getTypeName() + "\" is found"); + } + + stringOI = (StringObjectInspector) arguments[0]; + + List names = Arrays.asList("col_name", "col_type"); + List fieldOIs = Arrays.asList(PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector, + PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector); + StructObjectInspector outputOI = ObjectInspectorFactory + .getStandardStructObjectInspector(names, fieldOIs); + + LOG.debug("done initializing GenericUDTFGetSchema"); + return outputOI; + } + + + @Override + public void close() throws HiveException { + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSchema.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSchema.java new file mode 100644 index 0000000000..f3758cc218 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSchema.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestGenericUDTFGetSchema { + + private static SessionState sessionState; + + @BeforeClass + public static void setUpBeforeClass() throws Exception { + HiveConf conf = new HiveConf(); + conf.set("hive.security.authorization.manager", + "org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider"); + sessionState = SessionState.start(conf); + } + + @AfterClass + public static void tearDownAfterClass() throws Exception { + SessionState.endStart(sessionState); + } + + @Test + public void testWithComplexTypes() throws Exception { + invokeUDTFAndTest("select array('val1','val2') c1," + + " named_struct('a',1,'b','2') c2, " + + " array(array(1)) c3," + + " array(named_struct('a',1,'b','2')) c4," + + " map(1,1) c5", + new String[]{"c1", "array", + "c2", "struct", + "c3", "array>", + "c4", "array>", + "c5", "map" + }); + } + + @Test + public void testWithSimpleTypes() throws Exception { + invokeUDTFAndTest("select 1 as c1, 'Happy Valentines Day' as c2, 2.2 as c3, cast(2.2 as float) c4, " + + "cast(2.2 as double) c5, " + + "cast('2019-02-14' as date) c6", + new String[]{"c1", "int", + "c2", "string", + "c3", "decimal(2,1)", + "c4", "float", + "c5", "double", + "c6", "date" + }); + } + + @Test + public void testWithDDL() throws Exception { + invokeUDTFAndTest("show tables", new String[]{}); + } + + private void invokeUDTFAndTest(String query, String[] expected) throws HiveException { + + GenericUDTFGetSchema genericUDTFGetSchema = new GenericUDTFGetSchema(); + List actual = new ArrayList<>(); + genericUDTFGetSchema.collector = input -> { + if (input != null) { + Object[] udfOutput = (Object[]) input; + actual.add(new String((byte[]) udfOutput[0])); + actual.add(new String((byte[]) udfOutput[1])); + } + }; + + genericUDTFGetSchema.initialize(new ObjectInspector[]{javaStringObjectInspector}); + genericUDTFGetSchema.process(new Object[]{query}); + + assertEquals(expected.length, actual.size()); + assertTrue("Failed for query: " + query + ". Expected: " + Arrays.toString(expected) + + ". Actual: " + actual, Arrays.equals(expected, actual.toArray())); + } + +} \ No newline at end of file diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 0fdcbda66f..cc194c5771 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -109,6 +109,7 @@ format_number from_unixtime from_utc_timestamp get_json_object +get_schema get_splits greatest grouping