diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 61d598c971..8d36c28a9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -530,7 +530,7 @@ system.registerGenericUDTF("posexplode", GenericUDTFPosExplode.class); system.registerGenericUDTF("stack", GenericUDTFStack.class); system.registerGenericUDTF("get_splits", GenericUDTFGetSplits.class); - system.registerGenericUDTF("get_schema", GenericUDTFGetSchema.class); + system.registerGenericUDTF("get_sql_schema", GenericUDTFGetSQLSchema.class); //PTF declarations system.registerGenericUDF(LEAD_FUNC_NAME, GenericUDFLead.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSchema.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSQLSchema.java similarity index 86% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSchema.java rename to ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSQLSchema.java index 7db5ce0df8..9e48cfed67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSchema.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSQLSchema.java @@ -43,16 +43,16 @@ import org.slf4j.LoggerFactory; /** - * GenericUDTFGetSchema. + * GenericUDTFGetSQLSchema. */ -@Description(name = "get_schema", value = "_FUNC_(string) - " +@Description(name = "get_sql_schema", value = "_FUNC_(string) - " + "Takes query as argument. Returns schema (column names and types) of the resultset " + " that would be generated when the query is executed. " + - "Can be invoked like: select get_schema(\"select * from some_table\")." + + "Can be invoked like: select get_sql_schema(\"select * from some_table\")." + "NOTE: This does not produce any output for DDL queries like show tables/databases/... and others.") @UDFType(deterministic = false) -public class GenericUDTFGetSchema extends GenericUDTF { - private static final Logger LOG = LoggerFactory.getLogger(GenericUDTFGetSchema.class); +public class GenericUDTFGetSQLSchema extends GenericUDTF { + private static final Logger LOG = LoggerFactory.getLogger(GenericUDTFGetSQLSchema.class); protected transient StringObjectInspector stringOI; protected transient JobConf jc; @@ -85,20 +85,20 @@ public void process(Object[] arguments) throws HiveException { public StructObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - LOG.debug("initializing GenericUDTFGetSchema"); + LOG.debug("initializing GenericUDTFGetSQLSchema"); if (SessionState.get() == null || SessionState.get().getConf() == null) { - throw new IllegalStateException("Cannot run get schema outside HS2"); + throw new IllegalStateException("Cannot run GET_SQL_SCHEMA outside HS2"); } LOG.debug("Initialized conf, jc and metastore connection"); if (arguments.length != 1) { throw new UDFArgumentLengthException( - "The function GET_SCHEMA accepts 1 argument."); + "The function GET_SQL_SCHEMA accepts 1 argument."); } else if (!(arguments[0] instanceof StringObjectInspector)) { LOG.error("Got " + arguments[0].getTypeName() + " instead of string."); throw new UDFArgumentTypeException(0, "\"" - + "string\" is expected at function GET_SCHEMA, " + "but \"" + + "string\" is expected at function GET_SQL_SCHEMA, " + "but \"" + arguments[0].getTypeName() + "\" is found"); } @@ -110,10 +110,14 @@ public StructObjectInspector initialize(ObjectInspector[] arguments) StructObjectInspector outputOI = ObjectInspectorFactory .getStandardStructObjectInspector(names, fieldOIs); - LOG.debug("done initializing GenericUDTFGetSchema"); + LOG.debug("done initializing GenericUDTFGetSQLSchema"); return outputOI; } + @Override + public String toString() { + return "get_sql_schema"; + } @Override public void close() throws HiveException { diff --git ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSchema.java ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java similarity index 91% rename from ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSchema.java rename to ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java index f3758cc218..1f6bb018c8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSchema.java +++ ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDTFGetSQLSchema.java @@ -32,7 +32,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; -public class TestGenericUDTFGetSchema { +public class TestGenericUDTFGetSQLSchema { private static SessionState sessionState; @@ -85,9 +85,9 @@ public void testWithDDL() throws Exception { private void invokeUDTFAndTest(String query, String[] expected) throws HiveException { - GenericUDTFGetSchema genericUDTFGetSchema = new GenericUDTFGetSchema(); + GenericUDTFGetSQLSchema genericUDTFGetSQLSchema = new GenericUDTFGetSQLSchema(); List actual = new ArrayList<>(); - genericUDTFGetSchema.collector = input -> { + genericUDTFGetSQLSchema.collector = input -> { if (input != null) { Object[] udfOutput = (Object[]) input; actual.add(new String((byte[]) udfOutput[0])); @@ -95,8 +95,8 @@ private void invokeUDTFAndTest(String query, String[] expected) throws HiveExcep } }; - genericUDTFGetSchema.initialize(new ObjectInspector[]{javaStringObjectInspector}); - genericUDTFGetSchema.process(new Object[]{query}); + genericUDTFGetSQLSchema.initialize(new ObjectInspector[]{javaStringObjectInspector}); + genericUDTFGetSQLSchema.process(new Object[]{query}); assertEquals(expected.length, actual.size()); assertTrue("Failed for query: " + query + ". Expected: " + Arrays.toString(expected) diff --git ql/src/test/queries/clientpositive/udtf_get_sql_schema.q ql/src/test/queries/clientpositive/udtf_get_sql_schema.q new file mode 100644 index 0000000000..b8fadd4bc2 --- /dev/null +++ ql/src/test/queries/clientpositive/udtf_get_sql_schema.q @@ -0,0 +1,17 @@ +set hive.fetch.task.conversion=more; +set hive.mapred.mode=nonstrict; + +describe function get_sql_schema; +describe function extended get_sql_schema; + +create table t1(c1 int, c2 float, c3 double, c4 string, c5 date, c6 array, c7 struct, c8 map); +insert into t1 select 1, 1.1, 2.2, 'val1', '2019-02-15', array(1), named_struct('a',1,'b','2'), map(1,1); + +explain select get_sql_schema('select * from t1'); +select get_sql_schema('select * from t1'); + +create external table t2(c1 int, c2 float, c3 double, c4 string, c5 date, c6 array, c7 struct, c8 map); +insert into t2 select 1, 1.1, 2.2, 'val1', '2019-02-15', array(1), named_struct('a',1,'b','2'), map(1,1); + +explain select get_sql_schema('select * from t2'); +select get_sql_schema('select * from t2'); diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index cc194c5771..b987fa20fc 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -109,7 +109,7 @@ format_number from_unixtime from_utc_timestamp get_json_object -get_schema +get_sql_schema get_splits greatest grouping diff --git ql/src/test/results/clientpositive/udtf_get_sql_schema.q.out ql/src/test/results/clientpositive/udtf_get_sql_schema.q.out new file mode 100644 index 0000000000..2faf5aad57 --- /dev/null +++ ql/src/test/results/clientpositive/udtf_get_sql_schema.q.out @@ -0,0 +1,158 @@ +PREHOOK: query: describe function get_sql_schema +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: describe function get_sql_schema +POSTHOOK: type: DESCFUNCTION +get_sql_schema(string) - Takes query as argument. Returns schema (column names and types) of the resultset that would be generated when the query is executed. Can be invoked like: select get_sql_schema("select * from some_table").NOTE: This does not produce any output for DDL queries like show tables/databases/... and others. +PREHOOK: query: describe function extended get_sql_schema +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: describe function extended get_sql_schema +POSTHOOK: type: DESCFUNCTION +get_sql_schema(string) - Takes query as argument. Returns schema (column names and types) of the resultset that would be generated when the query is executed. Can be invoked like: select get_sql_schema("select * from some_table").NOTE: This does not produce any output for DDL queries like show tables/databases/... and others. +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDTFGetSQLSchema +Function type:BUILTIN +PREHOOK: query: create table t1(c1 int, c2 float, c3 double, c4 string, c5 date, c6 array, c7 struct, c8 map) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t1 +POSTHOOK: query: create table t1(c1 int, c2 float, c3 double, c4 string, c5 date, c6 array, c7 struct, c8 map) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t1 +PREHOOK: query: insert into t1 select 1, 1.1, 2.2, 'val1', '2019-02-15', array(1), named_struct('a',1,'b','2'), map(1,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t1 +POSTHOOK: query: insert into t1 select 1, 1.1, 2.2, 'val1', '2019-02-15', array(1), named_struct('a',1,'b','2'), map(1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t1 +POSTHOOK: Lineage: t1.c1 SIMPLE [] +POSTHOOK: Lineage: t1.c2 EXPRESSION [] +POSTHOOK: Lineage: t1.c3 EXPRESSION [] +POSTHOOK: Lineage: t1.c4 SIMPLE [] +POSTHOOK: Lineage: t1.c5 EXPRESSION [] +POSTHOOK: Lineage: t1.c6 EXPRESSION [] +POSTHOOK: Lineage: t1.c7 EXPRESSION [] +POSTHOOK: Lineage: t1.c8 EXPRESSION [] +PREHOOK: query: explain select get_sql_schema('select * from t1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain select get_sql_schema('select * from t1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'select * from t1' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + function name: get_sql_schema + Select Operator + expressions: col_name (type: binary), col_type (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select get_sql_schema('select * from t1') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select get_sql_schema('select * from t1') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +t1.c1 int +t1.c2 float +t1.c3 double +t1.c4 string +t1.c5 date +t1.c6 array +t1.c7 struct +t1.c8 map +PREHOOK: query: create external table t2(c1 int, c2 float, c3 double, c4 string, c5 date, c6 array, c7 struct, c8 map) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t2 +POSTHOOK: query: create external table t2(c1 int, c2 float, c3 double, c4 string, c5 date, c6 array, c7 struct, c8 map) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t2 +PREHOOK: query: insert into t2 select 1, 1.1, 2.2, 'val1', '2019-02-15', array(1), named_struct('a',1,'b','2'), map(1,1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t2 +POSTHOOK: query: insert into t2 select 1, 1.1, 2.2, 'val1', '2019-02-15', array(1), named_struct('a',1,'b','2'), map(1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t2 +POSTHOOK: Lineage: t2.c1 SIMPLE [] +POSTHOOK: Lineage: t2.c2 EXPRESSION [] +POSTHOOK: Lineage: t2.c3 EXPRESSION [] +POSTHOOK: Lineage: t2.c4 SIMPLE [] +POSTHOOK: Lineage: t2.c5 EXPRESSION [] +POSTHOOK: Lineage: t2.c6 EXPRESSION [] +POSTHOOK: Lineage: t2.c7 EXPRESSION [] +POSTHOOK: Lineage: t2.c8 EXPRESSION [] +PREHOOK: query: explain select get_sql_schema('select * from t2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: explain select get_sql_schema('select * from t2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'select * from t2' (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + UDTF Operator + Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + function name: get_sql_schema + Select Operator + expressions: col_name (type: binary), col_type (type: binary) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + ListSink + +PREHOOK: query: select get_sql_schema('select * from t2') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select get_sql_schema('select * from t2') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +t2.c1 int +t2.c2 float +t2.c3 double +t2.c4 string +t2.c5 date +t2.c6 array +t2.c7 struct +t2.c8 map