diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 425900d..027819c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.udf.UDFUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; +import org.apache.hadoop.hive.ql.udf.UDFDayName; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet; @@ -265,7 +266,8 @@ public final class FunctionRegistry { registerUDF("abs", UDFAbs.class, false); registerUDF("pmod", UDFPosMod.class, false); - registerUDF("ln", UDFLn.class, false); + registerUDF("ln", UDFLn.class, false); registerUDF("rpad", UDFRpad.class, false); + registerUDF("log2", UDFLog2.class, false); registerUDF("sin", UDFSin.class, false); registerUDF("asin", UDFAsin.class, false); @@ -295,6 +297,7 @@ public final class FunctionRegistry { registerUDF("lcase", UDFLower.class, false); registerUDF("trim", UDFTrim.class, false); registerUDF("ltrim", UDFLTrim.class, false); + registerUDF("dayname", UDFDayName.class, false); registerUDF("rtrim", UDFRTrim.class, false); registerUDF("length", UDFLength.class, false); registerUDF("reverse", UDFReverse.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayName.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayName.java new file mode 100644 index 0000000..8d29e20 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayName.java @@ -0,0 +1,229 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import java.text.DateFormat; +import java.text.ParseException; +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.util.Date; +import org.apache.hadoop.io.Text; + +/** + * UDFDayName. + * + */ + +@Description(name = "dayname", + value = "_FUNC_(date) - Returns the day name of the date", + extended = "date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or " + + "'yyyy-MM-dd' or 'yyyy/MM/dd'.\n" + "Example:\n " + + " > SELECT _FUNC_('2012-30-07') FROM src LIMIT 1;\n" + " Monday ") +public final class UDFDayName extends UDF { + + private Text dayName = new Text(); + + /* default constructor */ + public UDFDayName() { + + } + + /** + * Checks whether the input date is a valid date + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd" or "yyyy/MM/dd". + * @param simpleDateFormat + * SimpleDateFormat in the form "yyyy-MM-dd" or "yyyy/MM/dd". + * @return True if the dateString is a valid date string. false if the + * dateString is not a valid date string. + * + */ + + boolean isLegalDate(String dateString, SimpleDateFormat simpleDateFormat) { + + simpleDateFormat.setLenient(false); // Initialising setLenient as + // false,since by default it is true + + return simpleDateFormat.parse(dateString, new ParsePosition(0)) != null;// returns + // true + // if + // the + // input + // date + // is + // of + // correct + // format + + } + + /** + * Get the day name of a date from a date string. + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy/MM/dd HH:mm:ss" "yyyy-MM-dd" or yyyy/MM/dd + * @return A Text from the set(Sunday, Monday, Tuesday, Wednesday, Thursday, + * Friday, Saturday). null if the dateString is not a valid date + * string. + * @throws java.text.ParseException + */ + + public Text evaluate(Text dateString) throws java.text.ParseException { + + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");// Date + // format + // can + // be in + // any + // of + // the + // following + // format-(yyyy-MM-dd),(yyyy/MM/dd) + SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy/MM/dd"); + + if (dateString == null) { // input text must not be null + // if input is null a null would be returned + + return null; + } + + if (dateString.toString().length() < 10) { // Input date must be of + // length greater than or + // equal to 10(since + // format-(yyyy-MM-dd) or + // (yyyy/MM/dd)is + // of 10 character length) + // if not it + // will return null + return null; + } + + try { + Date date = null; + DateFormat dateFormat = new SimpleDateFormat("EEEE"); + String dateFormatCheck = dateString.toString().substring(0, 10); // extracting + // the + // date + // part + // from + // the + // input + // string + String indexOfSlash = dateString.toString().substring(4, 5); // extracting + // the + // 4 + // th + // character + // from + // the + // input + // string + if ((indexOfSlash.equals("/")) && (isLegalDate(dateFormatCheck, dateFormatter))) { // if + // the + // date + // format + // is + // of + // yyyy/MM/dd + // it + // is + // stored + // in + // the + // text + // variable + // dayName. + date = dateFormatter.parse(dateFormatCheck); + dayName.set(dateFormat.format(date).toLowerCase()); + } else if (isLegalDate(dateFormatCheck, formatter)) { // if the date + // format is + // of + // yyyy-MM-dd + // it is + // stored in + // the text + // variable + // dayName. + date = formatter.parse(dateFormatCheck); + dayName.set(dateFormat.format(date).toLowerCase()); + } else + // if the input date is not valid it returns null + + return null; + return dayName; + + } catch (ParseException e) { + + return null; + } + + } + + /** + * Get the day name of a date from a date string. + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" + * + * @return A Text from the set(Sunday, Monday, Tuesday, Wednesday, Thursday, + * Friday, Saturday). null if the dateString is not a valid date + * string. + * @throws java.text.ParseException + */ + + public Text evaluate(TimestampWritable dateTimeStamp) throws java.text.ParseException { + + if (dateTimeStamp == null) { // it returns null if the input is null + return null; + } + + Date date = dateTimeStamp.getTimestamp(); // converting the timestamp to + // date + + SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + + try { + if (isLegalDate(date.toString(), formatter)) { // checking the input + // date is legal or + // not + + DateFormat dateFormat = new SimpleDateFormat("EEEE"); + date = formatter.parse(date.toString()); // if the date format + // is of yyyy-MM-dd + // it is stored in + // the text variable + // dayName. + dayName.set(dateFormat.format(date).toLowerCase()); + return dayName; + } else + // if the input date is not valid it returns null + + return null; + } catch (ParseException e) { + return null; + } + + } + +} diff --git ql/src/test/queries/clientpositive/udf_dayname.q ql/src/test/queries/clientpositive/udf_dayname.q new file mode 100644 index 0000000..eee72b1 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_dayname.q @@ -0,0 +1,39 @@ +DESCRIBE FUNCTION dayname; +DESCRIBE FUNCTION EXTENDED dayname; +EXPLAIN SELECT '2009-03-20', + dayname("2009-03-20") FROM src LIMIT 1; +SELECT '2009-03-20', + dayname("2009-03-20") FROM src LIMIT 1; + + +SELECT + '2009/03/20', + dayname("2009/03/20") FROM src LIMIT 1; + + +SELECT + '2009/03/20 11:30:01', + dayname("2009/03/20 11:30:01") +FROM src LIMIT 1; + +SELECT + '2009-03-20 11:30:01', + dayname("2009-03-20 11:30:01") +FROM src LIMIT 1; + +SELECT + 'random_string', + dayname('random_string') +FROM src LIMIT 1; + +SELECT + '2009 Mar 20 11:30:01 am', + dayname("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1; + + + + + + + diff --git ql/src/test/results/clientpositive/udf_dayname.q.out ql/src/test/results/clientpositive/udf_dayname.q.out new file mode 100644 index 0000000..f1d0446 --- /dev/null +++ ql/src/test/results/clientpositive/udf_dayname.q.out @@ -0,0 +1,138 @@ +PREHOOK: query: DESCRIBE FUNCTION dayname +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION dayname +POSTHOOK: type: DESCFUNCTION +dayname(date) - Returns the day name of the date +PREHOOK: query: DESCRIBE FUNCTION EXTENDED dayname +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dayname +POSTHOOK: type: DESCFUNCTION +dayname(date) - Returns the day name of the date +date is a string in the format of 'yyyy-MM-dd HH:mm:ss' or 'yyyy-MM-dd' or 'yyyy/MM/dd'. +Example: + > SELECT dayname('2012-30-07') FROM src LIMIT 1; + Monday +PREHOOK: query: EXPLAIN SELECT '2009-03-20', + dayname("2009-03-20") FROM src LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT '2009-03-20', + dayname("2009-03-20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR '2009-03-20') (TOK_SELEXPR (TOK_FUNCTION dayname "2009-03-20"))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: '2009-03-20' + type: string + expr: dayname('2009-03-20') + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT '2009-03-20', + dayname("2009-03-20") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT '2009-03-20', + dayname("2009-03-20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009-03-20 friday +PREHOOK: query: SELECT + '2009/03/20', + dayname("2009/03/20") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009/03/20', + dayname("2009/03/20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009/03/20 friday +PREHOOK: query: SELECT + '2009/03/20 11:30:01', + dayname("2009/03/20 11:30:01") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009/03/20 11:30:01', + dayname("2009/03/20 11:30:01") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009/03/20 11:30:01 friday +PREHOOK: query: SELECT + '2009-03-20 11:30:01', + dayname("2009-03-20 11:30:01") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009-03-20 11:30:01', + dayname("2009-03-20 11:30:01") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009-03-20 11:30:01 friday +PREHOOK: query: SELECT + 'random_string', + dayname('random_string') +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + 'random_string', + dayname('random_string') +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +random_string NULL +PREHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + dayname("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + dayname("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009 Mar 20 11:30:01 am NULL