diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index fc0256c..fd5b222 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -58,6 +58,7 @@ import org.apache.hadoop.hive.ql.udf.UDFDateAdd; import org.apache.hadoop.hive.ql.udf.UDFDateDiff; import org.apache.hadoop.hive.ql.udf.UDFDateSub; +import org.apache.hadoop.hive.ql.udf.UDFDayName; import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth; import org.apache.hadoop.hive.ql.udf.UDFDegrees; import org.apache.hadoop.hive.ql.udf.UDFE; @@ -317,7 +318,8 @@ registerUDF("positive", UDFOPPositive.class, true, "+"); registerUDF("negative", UDFOPNegative.class, true, "-"); - + + registerUDF("dayname", UDFDayName.class, false); registerUDF("day", UDFDayOfMonth.class, false); registerUDF("dayofmonth", UDFDayOfMonth.class, false); registerUDF("month", UDFMonth.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayName.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayName.java new file mode 100644 index 0000000..0d0010b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayName.java @@ -0,0 +1,149 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* http://www.apache.org/licenses/LICENSE-2.0 +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.util.Date; +import org.apache.hadoop.io.Text; + + + + +public final class UDFDayName extends UDF { + + private Text dayName = new Text(); + + public UDFDayName() { + + } + + /** + * Checks whether the input date is a valid date + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd" or "yyyy/MM/dd". + * @param simpleDateFormat + * SimpleDateFormat in the form "yyyy-MM-dd" or "yyyy/MM/dd". + * @return True if the dateString is a valid date string. false if the + * dateString is not a valid date string. + * + */ + boolean isLegalDate(String dateString, SimpleDateFormat simpleDateFormat) { + + simpleDateFormat.setLenient(false); // Initialising setLenient as false,since by default it is true + + return simpleDateFormat.parse(dateString,new ParsePosition(0)) != null;//returns true the input date is of correct + // format + } + + /** + * Get the day name of a date from a date string. + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy/MM/dd HH:mm:ss" "yyyy-MM-dd" or yyyy/MM/dd + * @return A Text from the set(Sunday, Monday, Tuesday, Wednesday, Thursday, + * Friday, Saturday). null if the dateString is not a valid date + * string. + * @throws java.text.ParseException + */ + public Text evaluate(Text dateString) throws java.text.ParseException { + + //Date format can be in any of the following format-(yyyy-MM-dd),(yyyy/MM/dd) + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy/MM/dd"); + + if (dateString == null) { // input text must not be null if input is null a null would be returned + return null; + } + if (dateString.toString().length() < 10) { + return null; + } + + try { + Date date = null; + DateFormat dateFormat = new SimpleDateFormat("EEEE"); + + //extracting the date part from the input string + String dateFormatCheck = dateString.toString().substring(0, 10); + + // extracting the 4 the character from the input + String indexOfSlash = dateString.toString().substring(4, 5); + + // if the date format is of yyyy/MM/dd it is stored in the text variable dayName. + if ((indexOfSlash.equals("/")) && (isLegalDate(dateFormatCheck, dateFormatter))) { + + date = dateFormatter.parse(dateFormatCheck); + dayName.set(dateFormat.format(date).toLowerCase()); + + } else if (isLegalDate(dateFormatCheck, formatter)) { + date = formatter.parse(dateFormatCheck); + dayName.set(dateFormat.format(date).toLowerCase()); + + } else // if the input date is not valid it returns null + return null; + + return dayName; + + } catch (ParseException e) { + + return null; + } + } + + /** + * Get the day name of a date from a date string. + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" + * + * @return A Text from the set(Sunday, Monday, Tuesday, Wednesday, Thursday, + * Friday, Saturday). null if the dateString is not a valid date + * string. + * @throws java.text.ParseException + */ + public Text evaluate(TimestampWritable dateTimeStamp) throws java.text.ParseException { + + if (dateTimeStamp == null) { // it returns null if the input is null + return null; + } + + Date date = dateTimeStamp.getTimestamp(); // converting the timestamp to date + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + + try { + if (isLegalDate(date.toString(), formatter)) { // checking the input date is legal or not + DateFormat dateFormat = new SimpleDateFormat("EEEE"); + + // if the date format is of yyyy-MM-dd it is stored in the text variable dayName + date = formatter.parse(date.toString()); + dayName.set(dateFormat.format(date).toLowerCase()); + return dayName; + + } else // if the input date is not valid it returns null + return null; + + } catch (ParseException e) { + return null; + } + + } +} diff --git ql/src/test/queries/clientpositive/udf_dayname.q ql/src/test/queries/clientpositive/udf_dayname.q new file mode 100644 index 0000000..9ff9717 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_dayname.q @@ -0,0 +1,44 @@ +DESCRIBE FUNCTION dayname; +DESCRIBE FUNCTION EXTENDED dayname; + +EXPLAIN SELECT + dayname('2013-02-06') +FROM src LIMIT 1; + +SELECT '2009-03-20', +dayname("2009-03-20") FROM src LIMIT 1; + + +SELECT + '2009/03/20', + dayname("2009/03/20") FROM src LIMIT 1; + + +SELECT + '2009/03/20 11:30:01', + dayname("2009/03/20 11:30:01") +FROM src LIMIT 1; + +SELECT + '2009-03-20 11:30:01', + dayname("2009-03-20 11:30:01") +FROM src LIMIT 1; + +SELECT + 'random_string', + dayname('random_string') +FROM src LIMIT 1; + +SELECT + '2009 Mar 20 11:30:01 am', + dayname("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1; + + + + + + + + + diff --git ql/src/test/results/clientpositive/udf_dayname.q.out ql/src/test/results/clientpositive/udf_dayname.q.out new file mode 100644 index 0000000..e5db1c0 --- /dev/null +++ ql/src/test/results/clientpositive/udf_dayname.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: DESCRIBE FUNCTION dayname +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION dayname +POSTHOOK: type: DESCFUNCTION +There is no documentation for function 'dayname' +PREHOOK: query: DESCRIBE FUNCTION EXTENDED dayname +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dayname +POSTHOOK: type: DESCFUNCTION +There is no documentation for function 'dayname' +PREHOOK: query: EXPLAIN SELECT + dayname('2013-02-06') +FROM src LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + dayname('2013-02-06') +FROM src LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION dayname '2013-02-06'))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: dayname('2013-02-06') + type: string + outputColumnNames: _col0 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT '2009-03-20', +dayname("2009-03-20") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT '2009-03-20', +dayname("2009-03-20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009-03-20 friday +PREHOOK: query: SELECT + '2009/03/20', + dayname("2009/03/20") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009/03/20', + dayname("2009/03/20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009/03/20 friday +PREHOOK: query: SELECT + '2009/03/20 11:30:01', + dayname("2009/03/20 11:30:01") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009/03/20 11:30:01', + dayname("2009/03/20 11:30:01") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009/03/20 11:30:01 friday +PREHOOK: query: SELECT + '2009-03-20 11:30:01', + dayname("2009-03-20 11:30:01") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009-03-20 11:30:01', + dayname("2009-03-20 11:30:01") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009-03-20 11:30:01 friday +PREHOOK: query: SELECT + 'random_string', + dayname('random_string') +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + 'random_string', + dayname('random_string') +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +random_string NULL +PREHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + dayname("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + dayname("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009 Mar 20 11:30:01 am NULL