diff --git a/data/files/emp2.txt b/data/files/emp2.txt new file mode 100644 index 0000000..650aff7 --- /dev/null +++ b/data/files/emp2.txt @@ -0,0 +1,16 @@ +7369|SMITH|CLERK|7902|1980-12-17|1980-12-17 00:00:00|800|NULL|20 +7499|ALLEN|SALESMAN|7698|1981-02-20|1981-02-20 00:00:00|1600|300|30 +7521|WARD|SALESMAN|7698|1981-02-22|1981-02-22 00:00:00|1250|500|30 +7566|JONES|MANAGER|7839|1981-04-02|1981-04-02 00:00:00|2975|NULL|20 +7654|MARTIN|SALESMAN|7698|1981-09-28|1981-09-28 00:00:00|1250|1400|30 +7698|BLAKE|MANAGER|7839|1981-05-01|1981-05-01 00:00:00|2850|NULL|30 +7782|CLARK|MANAGER|7839|1981-06-09|1981-06-09 00:00:00|2450|NULL|10 +7788|SCOTT|ANALYST|7566|1982-12-09|1982-12-09 00:00:00|3000|NULL|20 +7839|KING|PRESIDENT|NULL|1981-11-17|1981-11-17 00:00:00|5000|NULL|10 +7844|TURNER|SALESMAN|7698|1981-09-08|1981-09-08 00:00:00|1500|0|30 +7876|ADAMS|CLERK|7788|1983-01-12|1983-01-12 00:00:00|1100|NULL|20 +7900|JAMES|CLERK|7698|1981-12-03|1981-12-03 00:00:00|950|NULL|30 +7902|FORD|ANALYST|7566|1981-12-03|1981-12-03 00:00:00|3000|NULL|20 +7934|MILLER|CLERK|7782|1982-01-23|1982-01-23 00:00:00|1300|NULL|10 +7988|KATY|ANALYST|7566|NULL|NULL|1500|NULL|10 +7987|JULIA|ANALYST|7566|NULL|NULL|1500|NULL|10 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java index c1b8a1d..e0cd398 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFTranslator.java @@ -610,6 +610,7 @@ private static void validateValueBoundaryExprType(ObjectInspector OI) case SHORT: case DECIMAL: case TIMESTAMP: + case DATE: case STRING: break; default: diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java index 40fd6a4..4635ba2 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java @@ -20,6 +20,7 @@ import java.util.AbstractList; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -1141,6 +1142,8 @@ public static ValueBoundaryScanner getScanner(ValueBoundaryDef vbDef, Order orde return new DoubleValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); case DECIMAL: return new HiveDecimalValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); + case DATE: + return new DateValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); case STRING: return new StringValueBoundaryScanner(vbDef, order, vbDef.getExpressionDef()); } @@ -1231,6 +1234,34 @@ public boolean isEqual(Object v1, Object v2) { } } + public static class DateValueBoundaryScanner extends ValueBoundaryScanner { + public DateValueBoundaryScanner(BoundaryDef bndDef, Order order, + PTFExpressionDef expressionDef) { + super(bndDef,order,expressionDef); + } + + @Override + public boolean isGreater(Object v1, Object v2, int amt) { + Date l1 = PrimitiveObjectInspectorUtils.getDate(v1, + (PrimitiveObjectInspector) expressionDef.getOI()); + Date l2 = PrimitiveObjectInspectorUtils.getDate(v2, + (PrimitiveObjectInspector) expressionDef.getOI()); + if (l1 != null && l2 != null) { + return (double)(l1.getTime() - l2.getTime())/1000 > (long)amt * 24 * 3600; // Converts amt days to milliseconds + } + return l1 != l2; // True if only one date is null + } + + @Override + public boolean isEqual(Object v1, Object v2) { + Date l1 = PrimitiveObjectInspectorUtils.getDate(v1, + (PrimitiveObjectInspector) expressionDef.getOI()); + Date l2 = PrimitiveObjectInspectorUtils.getDate(v2, + (PrimitiveObjectInspector) expressionDef.getOI()); + return (l1 == null && l2 == null) || (l1 != null && l1.equals(l2)); + } + } + public static class StringValueBoundaryScanner extends ValueBoundaryScanner { public StringValueBoundaryScanner(BoundaryDef bndDef, Order order, PTFExpressionDef expressionDef) { diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec3.q b/ql/src/test/queries/clientpositive/windowing_windowspec3.q new file mode 100644 index 0000000..d00e939 --- /dev/null +++ b/ql/src/test/queries/clientpositive/windowing_windowspec3.q @@ -0,0 +1,29 @@ +-- Test value based windowing spec + +drop table if exists emp; + +create table emp(empno smallint, + ename varchar(10), + job varchar(10), + manager smallint, + hiredate date, + hirets timestamp, + salary double, + bonus double, + deptno tinyint) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/emp2.txt' into table emp; + +-- Support date datatype +select deptno, empno, hiredate, salary, + sum(salary) over (partition by deptno order by hiredate range 90 preceding), + sum(salary) over (partition by deptno order by hiredate range between 90 preceding and 90 following), + sum(salary) over (partition by deptno order by hiredate range between 90 preceding and 10 preceding), + sum(salary) over (partition by deptno order by hiredate range between 10 following and 90 following), + sum(salary) over (partition by deptno order by hiredate range between 10 following and unbounded following), + sum(salary) over (partition by deptno order by hiredate range between unbounded preceding and 10 following) +from emp; + + diff --git a/ql/src/test/results/clientpositive/windowing_windowspec3.q.out b/ql/src/test/results/clientpositive/windowing_windowspec3.q.out new file mode 100644 index 0000000..a5eae5b --- /dev/null +++ b/ql/src/test/results/clientpositive/windowing_windowspec3.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: -- Test value based windowing spec + +drop table if exists emp +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test value based windowing spec + +drop table if exists emp +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table emp(empno smallint, + ename varchar(10), + job varchar(10), + manager smallint, + hiredate date, + hirets timestamp, + salary double, + bonus double, + deptno tinyint) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@emp +POSTHOOK: query: create table emp(empno smallint, + ename varchar(10), + job varchar(10), + manager smallint, + hiredate date, + hirets timestamp, + salary double, + bonus double, + deptno tinyint) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@emp +PREHOOK: query: load data local inpath '../../data/files/emp2.txt' into table emp +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@emp +POSTHOOK: query: load data local inpath '../../data/files/emp2.txt' into table emp +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@emp +PREHOOK: query: -- Support date datatype +select deptno, empno, hiredate, salary, + sum(salary) over (partition by deptno order by hiredate range 90 preceding), + sum(salary) over (partition by deptno order by hiredate range between 90 preceding and 90 following), + sum(salary) over (partition by deptno order by hiredate range between 90 preceding and 10 preceding), + sum(salary) over (partition by deptno order by hiredate range between 10 following and 90 following), + sum(salary) over (partition by deptno order by hiredate range between 10 following and unbounded following), + sum(salary) over (partition by deptno order by hiredate range between unbounded preceding and 10 following) +from emp +PREHOOK: type: QUERY +PREHOOK: Input: default@emp +#### A masked pattern was here #### +POSTHOOK: query: -- Support date datatype +select deptno, empno, hiredate, salary, + sum(salary) over (partition by deptno order by hiredate range 90 preceding), + sum(salary) over (partition by deptno order by hiredate range between 90 preceding and 90 following), + sum(salary) over (partition by deptno order by hiredate range between 90 preceding and 10 preceding), + sum(salary) over (partition by deptno order by hiredate range between 10 following and 90 following), + sum(salary) over (partition by deptno order by hiredate range between 10 following and unbounded following), + sum(salary) over (partition by deptno order by hiredate range between unbounded preceding and 10 following) +from emp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp +#### A masked pattern was here #### +10 7988 NULL 1500.0 3000.0 3000.0 NULL NULL 8750.0 3000.0 +10 7987 NULL 1500.0 3000.0 3000.0 NULL NULL 8750.0 3000.0 +10 7782 1981-06-09 2450.0 2450.0 2450.0 NULL NULL 6300.0 5450.0 +10 7839 1981-11-17 5000.0 5000.0 6300.0 NULL 1300.0 1300.0 10450.0 +10 7934 1982-01-23 1300.0 6300.0 6300.0 5000.0 NULL NULL 11750.0 +20 7369 1980-12-17 800.0 800.0 800.0 NULL NULL 10075.0 800.0 +20 7566 1981-04-02 2975.0 2975.0 2975.0 NULL NULL 7100.0 3775.0 +20 7902 1981-12-03 3000.0 3000.0 3000.0 NULL NULL 4100.0 6775.0 +20 7788 1982-12-09 3000.0 3000.0 4100.0 NULL 1100.0 1100.0 9775.0 +20 7876 1983-01-12 1100.0 4100.0 4100.0 3000.0 NULL NULL 10875.0 +30 7499 1981-02-20 1600.0 1600.0 5700.0 NULL 2850.0 6550.0 2850.0 +30 7521 1981-02-22 1250.0 2850.0 5700.0 NULL 2850.0 6550.0 2850.0 +30 7698 1981-05-01 2850.0 5700.0 5700.0 2850.0 NULL 3700.0 5700.0 +30 7844 1981-09-08 1500.0 1500.0 3700.0 NULL 2200.0 2200.0 7200.0 +30 7654 1981-09-28 1250.0 2750.0 3700.0 1500.0 950.0 950.0 8450.0 +30 7900 1981-12-03 950.0 3700.0 3700.0 2750.0 NULL NULL 9400.0