diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 425900d..19e76ce 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -126,6 +126,7 @@ import org.apache.hadoop.hive.ql.udf.UDFUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; +import org.apache.hadoop.hive.ql.udf.UDFDayOfYear; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFBridge; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFCollectSet; @@ -264,6 +265,7 @@ public final class FunctionRegistry { registerUDF("rand", UDFRand.class, false); registerUDF("abs", UDFAbs.class, false); registerUDF("pmod", UDFPosMod.class, false); + registerUDF("dayofyear", UDFDayOfYear.class, false); registerUDF("ln", UDFLn.class, false); registerUDF("log2", UDFLog2.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfYear.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfYear.java new file mode 100644 index 0000000..a0ab4e4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDayOfYear.java @@ -0,0 +1,68 @@ +package org.apache.hadoop.hive.ql.udf; + + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; + +@Description(name = "dayofyear", value = "_FUNC_(date or timestamp) - Returns the numeric day of the year corresponding to date or timestamp") + public class UDFDayOfYear extends UDF { + private final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private final Calendar calendar = Calendar.getInstance(); + + private IntWritable result = new IntWritable(); + + public UDFDayOfYear() { + } + /** + * Get the numeric day of year from a date string. + * + * @param dateString + * the dateString in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an IntWritable from 1 to 366. null if the dateString is not a valid date + * string. + */ + public IntWritable evaluate(Text dateString) { + + if (dateString == null) { + return null; + } + + try { + Date date = formatter.parse(dateString.toString()); + calendar.setTime(date); + result.set(calendar.get(Calendar.DAY_OF_YEAR)); + return result; + } catch (ParseException e) { + return null; + } + } + /** + * Get the numeric day of the year from a Timestamp. + * + * @param TimestampWritable + * the dateTimeStamp in the format of "yyyy-MM-dd HH:mm:ss" or + * "yyyy-MM-dd". + * @return an IntWritable from 1 to 365 or 366. null if the Timestamp is not a valid date + * Timestamp. + */ + public IntWritable evaluate(TimestampWritable dateTimeStamp) { + if (dateTimeStamp == null) { + return null; + } + + calendar.setTime(dateTimeStamp.getTimestamp()); + result.set(calendar.get(Calendar.DAY_OF_YEAR)); + return result; + } + + } + diff --git ql/src/test/queries/clientpositive/udf_dayofyear.q ql/src/test/queries/clientpositive/udf_dayofyear.q new file mode 100644 index 0000000..9ec0850 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_dayofyear.q @@ -0,0 +1,32 @@ +DESCRIBE FUNCTION dayofyear; +DESCRIBE FUNCTION EXTENDED dayofyear; +EXPLAIN SELECT '2010-10-12', + dayofyear("2010-10-12") FROM src LIMIT 1; +SELECT '2009-03-20', + dayofyear("2009-03-20") FROM src LIMIT 1; + +SELECT + '2009/03/20', + dayofyear("2009/03/20") FROM src LIMIT 1; + + +SELECT + '2009/03/20 11:30:01', + dayofyear("2009/03/20 11:30:01") +FROM src LIMIT 1; + +SELECT + '2009-03-20 11:30:01', + dayofyear("2009-03-20 11:30:01") +FROM src LIMIT 1; + +SELECT + 'random_string', + dayofyear('random_string') +FROM src LIMIT 1; + +SELECT + '2009 Mar 20 11:30:01 am', + dayofyear("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1; + diff --git ql/src/test/results/clientpositive/udf_dayofyear.q.out ql/src/test/results/clientpositive/udf_dayofyear.q.out new file mode 100644 index 0000000..2f5463e --- /dev/null +++ ql/src/test/results/clientpositive/udf_dayofyear.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: DESCRIBE FUNCTION dayofyear +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION dayofyear +POSTHOOK: type: DESCFUNCTION +dayofyear(date or timestamp) - Returns the numeric day of the year corresponding to date or timestamp +PREHOOK: query: DESCRIBE FUNCTION EXTENDED dayofyear +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED dayofyear +POSTHOOK: type: DESCFUNCTION +dayofyear(date or timestamp) - Returns the numeric day of the year corresponding to date or timestamp +PREHOOK: query: EXPLAIN SELECT '2010-10-12', + dayofyear("2010-10-12") FROM src LIMIT 1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT '2010-10-12', + dayofyear("2010-10-12") FROM src LIMIT 1 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR '2010-10-12') (TOK_SELEXPR (TOK_FUNCTION dayofyear "2010-10-12"))) (TOK_LIMIT 1))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: '2010-10-12' + type: string + expr: dayofyear('2010-10-12') + type: int + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 1 + + +PREHOOK: query: SELECT '2009-03-20', + dayofyear("2009-03-20") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT '2009-03-20', + dayofyear("2009-03-20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009-03-20 79 +PREHOOK: query: SELECT + '2009/03/20', + dayofyear("2009/03/20") FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009/03/20', + dayofyear("2009/03/20") FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009/03/20 NULL +PREHOOK: query: SELECT + '2009/03/20 11:30:01', + dayofyear("2009/03/20 11:30:01") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009/03/20 11:30:01', + dayofyear("2009/03/20 11:30:01") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009/03/20 11:30:01 NULL +PREHOOK: query: SELECT + '2009-03-20 11:30:01', + dayofyear("2009-03-20 11:30:01") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009-03-20 11:30:01', + dayofyear("2009-03-20 11:30:01") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009-03-20 11:30:01 79 +PREHOOK: query: SELECT + 'random_string', + dayofyear('random_string') +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + 'random_string', + dayofyear('random_string') +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +random_string NULL +PREHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + dayofyear("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT + '2009 Mar 20 11:30:01 am', + dayofyear("2009 Mar 20 11:30:01 am") +FROM src LIMIT 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2009 Mar 20 11:30:01 am NULL