diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 7ceb005..b623187 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -302,7 +302,9 @@ KW_CONF: 'CONF'; KW_VALUES: 'VALUES'; KW_RELOAD: 'RELOAD'; KW_YEAR: 'YEAR'; +KW_QUARTER: 'QUARTER'; KW_MONTH: 'MONTH'; +KW_WEEK: 'WEEK'; KW_DAY: 'DAY'; KW_HOUR: 'HOUR'; KW_MINUTE: 'MINUTE'; @@ -329,6 +331,8 @@ KW_RELY: 'RELY'; KW_NORELY: 'NORELY'; KW_KEY: 'KEY'; KW_ABORT: 'ABORT'; +KW_EXTRACT: 'EXTRACT'; +KW_FLOOR: 'FLOOR'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 9ba1865..d0b85c5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -221,6 +221,50 @@ castExpression RPAREN -> ^(TOK_FUNCTION primitiveType expression) ; +extractExpression + : + KW_EXTRACT + LPAREN + (timeUnit=timeQualifiers) + KW_FROM + expression + RPAREN -> ^(TOK_FUNCTION $timeUnit expression) + ; + +timeQualifiers + : + KW_YEAR -> Identifier["year"] + | KW_QUARTER -> Identifier["quarter"] + | KW_MONTH -> Identifier["month"] + | KW_WEEK -> Identifier["weekofyear"] + | KW_DAY -> Identifier["day"] + | KW_HOUR -> Identifier["hour"] + | KW_MINUTE -> Identifier["minute"] + | KW_SECOND -> Identifier["second"] + ; + +floorTimeExpression + : + KW_FLOOR + LPAREN + expression + KW_TO + (floorTimeUnit=floorTimeQualifiers) + RPAREN -> ^(TOK_FUNCTION $floorTimeUnit expression) + ; + +floorTimeQualifiers + : + KW_YEAR -> Identifier["floor_year"] + | KW_QUARTER -> Identifier["floor_quarter"] + | KW_MONTH -> Identifier["floor_month"] + | KW_WEEK -> Identifier["floor_week"] + | KW_DAY -> Identifier["floor_day"] + | KW_HOUR -> Identifier["floor_hour"] + | KW_MINUTE -> Identifier["floor_minute"] + | KW_SECOND -> Identifier["floor_second"] + ; + caseExpression @init { gParent.pushMsg("case expression", state); } @after { gParent.popMsg(state); } @@ -323,6 +367,8 @@ atomExpression (KW_NULL) => KW_NULL -> TOK_NULL | (constant) => constant | castExpression + | extractExpression + | floorTimeExpression | caseExpression | whenExpression | (functionName LPAREN) => function @@ -654,13 +700,13 @@ nonReserved | KW_KEYS | KW_KEY_TYPE | KW_LAST | KW_LIMIT | KW_OFFSET | KW_LINES | KW_LOAD | KW_LOCATION | KW_LOCK | KW_LOCKS | KW_LOGICAL | KW_LONG | KW_MAPJOIN | KW_MATERIALIZED | KW_METADATA | KW_MINUS | KW_MINUTE | KW_MONTH | KW_MSCK | KW_NOSCAN | KW_NO_DROP | KW_NULLS | KW_OFFLINE | KW_OPTION | KW_OUTPUTDRIVER | KW_OUTPUTFORMAT | KW_OVERWRITE | KW_OWNER | KW_PARTITIONED | KW_PARTITIONS | KW_PLUS | KW_PRETTY - | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER + | KW_PRINCIPALS | KW_PROTECTION | KW_PURGE | KW_QUARTER | KW_READ | KW_READONLY | KW_REBUILD | KW_RECORDREADER | KW_RECORDWRITER | KW_RELOAD | KW_RENAME | KW_REPAIR | KW_REPLACE | KW_REPLICATION | KW_RESTRICT | KW_REWRITE | KW_ROLE | KW_ROLES | KW_SCHEMA | KW_SCHEMAS | KW_SECOND | KW_SEMI | KW_SERDE | KW_SERDEPROPERTIES | KW_SERVER | KW_SETS | KW_SHARED | KW_SHOW | KW_SHOW_DATABASE | KW_SKEWED | KW_SORT | KW_SORTED | KW_SSL | KW_STATISTICS | KW_STORED | KW_STREAMTABLE | KW_STRING | KW_STRUCT | KW_TABLES | KW_TBLPROPERTIES | KW_TEMPORARY | KW_TERMINATED | KW_TINYINT | KW_TOUCH | KW_TRANSACTIONS | KW_UNARCHIVE | KW_UNDO | KW_UNIONTYPE | KW_UNLOCK | KW_UNSET - | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE | KW_VIEW | KW_WHILE | KW_YEAR + | KW_UNSIGNED | KW_URI | KW_USE | KW_UTC | KW_UTCTIMESTAMP | KW_VALUE_TYPE | KW_VIEW | KW_WEEK | KW_WHILE | KW_YEAR | KW_WORK | KW_TRANSACTION | KW_WRITE diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java index 08ed9fd..126ed44 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFDateFloor.java @@ -40,7 +40,6 @@ public abstract class UDFDateFloor extends UDF { private final QueryGranularity granularity; - private final TimestampWritable result; public UDFDateFloor(String granularity) { @@ -52,7 +51,12 @@ public TimestampWritable evaluate(TimestampWritable t) { if (t == null) { return null; } - long newTimestamp = granularity.truncate(t.getTimestamp().getTime()); + final long originalTimestamp = t.getTimestamp().getTime(); // default + final long originalTimestampUTC = new DateTime(originalTimestamp) + .withZoneRetainFields(DateTimeZone.UTC).getMillis(); // default -> utc + final long newTimestampUTC = granularity.truncate(originalTimestampUTC); // utc + final long newTimestamp = new DateTime(newTimestampUTC, DateTimeZone.UTC) + .withZoneRetainFields(DateTimeZone.getDefault()).getMillis(); // utc -> default result.setTime(newTimestamp); return result; } diff --git ql/src/test/queries/clientpositive/extract.q ql/src/test/queries/clientpositive/extract.q new file mode 100644 index 0000000..c09574b --- /dev/null +++ ql/src/test/queries/clientpositive/extract.q @@ -0,0 +1,43 @@ +drop table extract_udf; + +create table extract_udf (t timestamp); +from (select * from src tablesample (1 rows)) s + insert overwrite table extract_udf + select '2011-05-06 07:08:09.1234567'; + +explain +select day(t) +from extract_udf; + +select day(t) +from extract_udf; + +-- new syntax +explain +select extract(day from t) +from extract_udf; + +select extract(day from t) +from extract_udf; + + +select extract(second from t) +from extract_udf; + +select extract(minute from t) +from extract_udf; + +select extract(hour from t) +from extract_udf; + +select extract(week from t) +from extract_udf; + +select extract(month from t) +from extract_udf; + +select extract(quarter from t) +from extract_udf; + +select extract(year from t) +from extract_udf; diff --git ql/src/test/queries/clientpositive/floor_time.q ql/src/test/queries/clientpositive/floor_time.q new file mode 100644 index 0000000..b0f6f90 --- /dev/null +++ ql/src/test/queries/clientpositive/floor_time.q @@ -0,0 +1,46 @@ +drop table extract_udf; + +create table extract_udf (t timestamp); +from (select * from src tablesample (1 rows)) s + insert overwrite table extract_udf + select '2011-05-06 07:08:09.1234567'; + +select t +from extract_udf; + +explain +select floor_day(t) +from extract_udf; + +select floor_day(t) +from extract_udf; + +-- new syntax +explain +select floor(t to day) +from extract_udf; + +select floor(t to day) +from extract_udf; + + +select floor(t to second) +from extract_udf; + +select floor(t to minute) +from extract_udf; + +select floor(t to hour) +from extract_udf; + +select floor(t to week) +from extract_udf; + +select floor(t to month) +from extract_udf; + +select floor(t to quarter) +from extract_udf; + +select floor(t to year) +from extract_udf; diff --git ql/src/test/results/clientpositive/extract.q.out ql/src/test/results/clientpositive/extract.q.out new file mode 100644 index 0000000..73c9bc6 --- /dev/null +++ ql/src/test/results/clientpositive/extract.q.out @@ -0,0 +1,200 @@ +PREHOOK: query: drop table extract_udf +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table extract_udf +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table extract_udf (t timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@extract_udf +POSTHOOK: query: create table extract_udf (t timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@extract_udf +PREHOOK: query: from (select * from src tablesample (1 rows)) s + insert overwrite table extract_udf + select '2011-05-06 07:08:09.1234567' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@extract_udf +POSTHOOK: query: from (select * from src tablesample (1 rows)) s + insert overwrite table extract_udf + select '2011-05-06 07:08:09.1234567' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@extract_udf +POSTHOOK: Lineage: extract_udf.t EXPRESSION [] +PREHOOK: query: explain +select day(t) +from extract_udf +PREHOOK: type: QUERY +POSTHOOK: query: explain +select day(t) +from extract_udf +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: day(t) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select day(t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select day(t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +6 +PREHOOK: query: -- new syntax +explain +select extract(day from t) +from extract_udf +PREHOOK: type: QUERY +POSTHOOK: query: -- new syntax +explain +select extract(day from t) +from extract_udf +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: day(t) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select extract(day from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(day from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +6 +PREHOOK: query: select extract(second from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(second from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +9 +PREHOOK: query: select extract(minute from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(minute from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +8 +PREHOOK: query: select extract(hour from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(hour from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +7 +PREHOOK: query: select extract(week from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(week from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +18 +PREHOOK: query: select extract(month from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(month from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +5 +PREHOOK: query: select extract(quarter from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(quarter from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2 +PREHOOK: query: select extract(year from t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select extract(year from t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011 diff --git ql/src/test/results/clientpositive/floor_time.q.out ql/src/test/results/clientpositive/floor_time.q.out new file mode 100644 index 0000000..f0cb324 --- /dev/null +++ ql/src/test/results/clientpositive/floor_time.q.out @@ -0,0 +1,211 @@ +PREHOOK: query: drop table extract_udf +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table extract_udf +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table extract_udf (t timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@extract_udf +POSTHOOK: query: create table extract_udf (t timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@extract_udf +PREHOOK: query: from (select * from src tablesample (1 rows)) s + insert overwrite table extract_udf + select '2011-05-06 07:08:09.1234567' +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@extract_udf +POSTHOOK: query: from (select * from src tablesample (1 rows)) s + insert overwrite table extract_udf + select '2011-05-06 07:08:09.1234567' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@extract_udf +POSTHOOK: Lineage: extract_udf.t EXPRESSION [] +PREHOOK: query: select t +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select t +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-06 07:08:09.1234567 +PREHOOK: query: explain +select floor_day(t) +from extract_udf +PREHOOK: type: QUERY +POSTHOOK: query: explain +select floor_day(t) +from extract_udf +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor_day(t) (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select floor_day(t) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor_day(t) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-06 00:00:00 +PREHOOK: query: -- new syntax +explain +select floor(t to day) +from extract_udf +PREHOOK: type: QUERY +POSTHOOK: query: -- new syntax +explain +select floor(t to day) +from extract_udf +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: extract_udf + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: floor_day(t) (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 27 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select floor(t to day) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to day) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-06 00:00:00 +PREHOOK: query: select floor(t to second) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to second) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-06 07:08:09 +PREHOOK: query: select floor(t to minute) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to minute) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-06 07:08:00 +PREHOOK: query: select floor(t to hour) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to hour) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-06 07:00:00 +PREHOOK: query: select floor(t to week) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to week) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-02 00:00:00 +PREHOOK: query: select floor(t to month) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to month) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-05-01 00:00:00 +PREHOOK: query: select floor(t to quarter) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to quarter) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-04-01 00:00:00 +PREHOOK: query: select floor(t to year) +from extract_udf +PREHOOK: type: QUERY +PREHOOK: Input: default@extract_udf +#### A masked pattern was here #### +POSTHOOK: query: select floor(t to year) +from extract_udf +POSTHOOK: type: QUERY +POSTHOOK: Input: default@extract_udf +#### A masked pattern was here #### +2011-01-01 00:00:00