diff --git a/data/files/emp2.txt b/data/files/emp2.txt index 650aff7..40ca64a 100644 --- a/data/files/emp2.txt +++ b/data/files/emp2.txt @@ -1,16 +1,16 @@ -7369|SMITH|CLERK|7902|1980-12-17|1980-12-17 00:00:00|800|NULL|20 -7499|ALLEN|SALESMAN|7698|1981-02-20|1981-02-20 00:00:00|1600|300|30 -7521|WARD|SALESMAN|7698|1981-02-22|1981-02-22 00:00:00|1250|500|30 -7566|JONES|MANAGER|7839|1981-04-02|1981-04-02 00:00:00|2975|NULL|20 -7654|MARTIN|SALESMAN|7698|1981-09-28|1981-09-28 00:00:00|1250|1400|30 -7698|BLAKE|MANAGER|7839|1981-05-01|1981-05-01 00:00:00|2850|NULL|30 -7782|CLARK|MANAGER|7839|1981-06-09|1981-06-09 00:00:00|2450|NULL|10 -7788|SCOTT|ANALYST|7566|1982-12-09|1982-12-09 00:00:00|3000|NULL|20 -7839|KING|PRESIDENT|NULL|1981-11-17|1981-11-17 00:00:00|5000|NULL|10 -7844|TURNER|SALESMAN|7698|1981-09-08|1981-09-08 00:00:00|1500|0|30 -7876|ADAMS|CLERK|7788|1983-01-12|1983-01-12 00:00:00|1100|NULL|20 -7900|JAMES|CLERK|7698|1981-12-03|1981-12-03 00:00:00|950|NULL|30 -7902|FORD|ANALYST|7566|1981-12-03|1981-12-03 00:00:00|3000|NULL|20 -7934|MILLER|CLERK|7782|1982-01-23|1982-01-23 00:00:00|1300|NULL|10 -7988|KATY|ANALYST|7566|NULL|NULL|1500|NULL|10 -7987|JULIA|ANALYST|7566|NULL|NULL|1500|NULL|10 +7369|SMITH|CLERK|7902|1980-12-17|1980-12-17 00:00:00|800|NULL|NULL|20 +7499|ALLEN|SALESMAN|7698|1981-02-20|1981-02-20 00:00:00|1600|300|200.50|30 +7521|WARD|SALESMAN|7698|1981-02-22|1981-02-22 00:00:00|1250|500|300.50|30 +7566|JONES|MANAGER|7839|1981-04-02|1981-04-02 00:00:00|2975|NULL|100.0|20 +7654|MARTIN|SALESMAN|7698|1981-09-28|1981-09-28 00:00:00|1250|1400|500.0|30 +7698|BLAKE|MANAGER|7839|1981-05-01|1981-05-01 00:00:00|2850|NULL|NULL|30 +7782|CLARK|MANAGER|7839|1981-06-09|1981-06-09 00:00:00|2450|NULL|50.0|10 +7788|SCOTT|ANALYST|7566|1982-12-09|1982-12-09 00:00:00|3000|NULL|NULL|20 +7839|KING|PRESIDENT|NULL|1981-11-17|1981-11-17 00:00:00|5000|NULL|NULL|10 +7844|TURNER|SALESMAN|7698|1981-09-08|1981-09-08 00:00:00|1500|0|300.0|30 +7876|ADAMS|CLERK|7788|1983-01-12|1983-01-12 00:00:00|1100|NULL|NULL|20 +7900|JAMES|CLERK|7698|1981-12-03|1981-12-03 00:00:00|950|NULL|NULL|30 +7902|FORD|ANALYST|7566|1981-12-03|1981-12-03 00:00:00|3000|NULL|NULL|20 +7934|MILLER|CLERK|7782|1982-01-23|1982-01-23 00:00:00|1300|NULL|100.0|10 +7988|KATY|ANALYST|7566|NULL|NULL|1500|NULL|200.0|10 +7987|JULIA|ANALYST|7566|NULL|NULL|1500|NULL|150.50|10 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java index 32471f2..63d35ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/ptf/WindowingTableFunction.java @@ -865,7 +865,7 @@ protected int computeStartPreceding(int rowIdx, PTFPartition p) throws HiveExcep // Use Case 4. if ( order == Order.DESC ) { - while (r >= 0 && !isGreater(rowVal, sortKey, amt) ) { + while (r >= 0 && !isDistanceGreater(rowVal, sortKey, amt) ) { r--; if ( r >= 0 ) { rowVal = computeValue(p.getAt(r)); @@ -874,7 +874,7 @@ protected int computeStartPreceding(int rowIdx, PTFPartition p) throws HiveExcep return r + 1; } else { // Use Case 5. - while (r >= 0 && !isGreater(sortKey, rowVal, amt) ) { + while (r >= 0 && !isDistanceGreater(sortKey, rowVal, amt) ) { r--; if ( r >= 0 ) { rowVal = computeValue(p.getAt(r)); @@ -936,7 +936,7 @@ protected int computeStartFollowing(int rowIdx, PTFPartition p) throws HiveExcep // Use Case 11. if ( order == Order.DESC) { - while (r < p.size() && !isGreater(sortKey, rowVal, amt) ) { + while (r < p.size() && !isDistanceGreater(sortKey, rowVal, amt) ) { r++; if ( r < p.size() ) { rowVal = computeValue(p.getAt(r)); @@ -945,7 +945,7 @@ protected int computeStartFollowing(int rowIdx, PTFPartition p) throws HiveExcep return r; } else { // Use Case 12. - while (r < p.size() && !isGreater(rowVal, sortKey, amt) ) { + while (r < p.size() && !isDistanceGreater(rowVal, sortKey, amt) ) { r++; if ( r < p.size() ) { rowVal = computeValue(p.getAt(r)); @@ -1021,7 +1021,7 @@ protected int computeEndPreceding(int rowIdx, PTFPartition p) throws HiveExcepti // Use Case 4. if ( order == Order.DESC ) { - while (r >= 0 && !isGreater(rowVal, sortKey, amt) ) { + while (r >= 0 && !isDistanceGreater(rowVal, sortKey, amt) ) { r--; if ( r >= 0 ) { rowVal = computeValue(p.getAt(r)); @@ -1030,7 +1030,7 @@ protected int computeEndPreceding(int rowIdx, PTFPartition p) throws HiveExcepti return r + 1; } else { // Use Case 5. - while (r >= 0 && !isGreater(sortKey, rowVal, amt) ) { + while (r >= 0 && !isDistanceGreater(sortKey, rowVal, amt) ) { r--; if ( r >= 0 ) { rowVal = computeValue(p.getAt(r)); @@ -1097,7 +1097,7 @@ protected int computeEndFollowing(int rowIdx, PTFPartition p) throws HiveExcepti // Use Case 11. if ( order == Order.DESC) { - while (r < p.size() && !isGreater(sortKey, rowVal, amt) ) { + while (r < p.size() && !isDistanceGreater(sortKey, rowVal, amt) ) { r++; if ( r < p.size() ) { rowVal = computeValue(p.getAt(r)); @@ -1106,7 +1106,7 @@ protected int computeEndFollowing(int rowIdx, PTFPartition p) throws HiveExcepti return r; } else { // Use Case 12. - while (r < p.size() && !isGreater(rowVal, sortKey, amt) ) { + while (r < p.size() && !isDistanceGreater(rowVal, sortKey, amt) ) { r++; if ( r < p.size() ) { rowVal = computeValue(p.getAt(r)); @@ -1121,8 +1121,16 @@ public Object computeValue(Object row) throws HiveException { return ObjectInspectorUtils.copyToStandardObject(o, expressionDef.getOI()); } - public abstract boolean isGreater(Object v1, Object v2, int amt); + /** + * Checks if the distance of v2 to v1 is greater than the given amt. + * @return True if the value of v1 - v2 is greater than amt or either value is null. + */ + public abstract boolean isDistanceGreater(Object v1, Object v2, int amt); + /** + * Checks if the values of v1 or v2 are the same. + * @return True if both values are the same or both are nulls. + */ public abstract boolean isEqual(Object v1, Object v2); @@ -1160,7 +1168,7 @@ public LongValueBoundaryScanner(BoundaryDef bndDef, Order order, } @Override - public boolean isGreater(Object v1, Object v2, int amt) { + public boolean isDistanceGreater(Object v1, Object v2, int amt) { if (v1 != null && v2 != null) { long l1 = PrimitiveObjectInspectorUtils.getLong(v1, (PrimitiveObjectInspector) expressionDef.getOI()); @@ -1193,7 +1201,7 @@ public DoubleValueBoundaryScanner(BoundaryDef bndDef, Order order, } @Override - public boolean isGreater(Object v1, Object v2, int amt) { + public boolean isDistanceGreater(Object v1, Object v2, int amt) { if (v1 != null && v2 != null) { double d1 = PrimitiveObjectInspectorUtils.getDouble(v1, (PrimitiveObjectInspector) expressionDef.getOI()); @@ -1226,15 +1234,16 @@ public HiveDecimalValueBoundaryScanner(BoundaryDef bndDef, Order order, } @Override - public boolean isGreater(Object v1, Object v2, int amt) { + public boolean isDistanceGreater(Object v1, Object v2, int amt) { HiveDecimal d1 = PrimitiveObjectInspectorUtils.getHiveDecimal(v1, (PrimitiveObjectInspector) expressionDef.getOI()); HiveDecimal d2 = PrimitiveObjectInspectorUtils.getHiveDecimal(v2, (PrimitiveObjectInspector) expressionDef.getOI()); - if ( d1 == null || d2 == null ) { - return false; + if ( d1 != null && d2 != null ) { + return d1.subtract(d2).intValue() > amt; } - return d1.subtract(d2).intValue() > amt; + + return d1 != null || d2 != null; // True if only one value is null } @Override @@ -1243,10 +1252,11 @@ public boolean isEqual(Object v1, Object v2) { (PrimitiveObjectInspector) expressionDef.getOI()); HiveDecimal d2 = PrimitiveObjectInspectorUtils.getHiveDecimal(v2, (PrimitiveObjectInspector) expressionDef.getOI()); - if ( d1 == null || d2 == null ) { - return false; + if ( d1 != null && d2 != null ) { + return d1.equals(d2); } - return d1.equals(d2); + + return d1 == null && d2 == null; // True if both are null } } @@ -1257,7 +1267,7 @@ public DateValueBoundaryScanner(BoundaryDef bndDef, Order order, } @Override - public boolean isGreater(Object v1, Object v2, int amt) { + public boolean isDistanceGreater(Object v1, Object v2, int amt) { Date l1 = PrimitiveObjectInspectorUtils.getDate(v1, (PrimitiveObjectInspector) expressionDef.getOI()); Date l2 = PrimitiveObjectInspectorUtils.getDate(v2, @@ -1285,7 +1295,7 @@ public StringValueBoundaryScanner(BoundaryDef bndDef, Order order, } @Override - public boolean isGreater(Object v1, Object v2, int amt) { + public boolean isDistanceGreater(Object v1, Object v2, int amt) { String s1 = PrimitiveObjectInspectorUtils.getString(v1, (PrimitiveObjectInspector) expressionDef.getOI()); String s2 = PrimitiveObjectInspectorUtils.getString(v2, diff --git a/ql/src/test/queries/clientpositive/windowing_windowspec3.q b/ql/src/test/queries/clientpositive/windowing_windowspec3.q index 608a6cf..aee0076 100644 --- a/ql/src/test/queries/clientpositive/windowing_windowspec3.q +++ b/ql/src/test/queries/clientpositive/windowing_windowspec3.q @@ -10,6 +10,7 @@ create table emp(empno smallint, hirets timestamp, salary double, bonus double, + stock decimal(10,2), deptno tinyint) row format delimited fields terminated by '|'; @@ -48,4 +49,14 @@ select deptno, empno, bonus, avg(bonus) over (partition by deptno order by bonus range between 100 following and 200 following), avg(bonus) over (partition by deptno order by bonus range between 200 following and unbounded following), avg(bonus) over (partition by deptno order by bonus range between unbounded preceding and 200 following) -from emp; \ No newline at end of file +from emp; + +-- Support Decimal datatype +select deptno, empno, stock, salary, + avg(salary) over (partition by deptno order by stock range 200 preceding), + avg(salary) over (partition by deptno order by stock range between 200 preceding and 200 following), + avg(salary) over (partition by deptno order by stock range between 200 preceding and 100 preceding), + avg(salary) over (partition by deptno order by stock range between 100 following and 200 following), + avg(salary) over (partition by deptno order by stock range between 200 following and unbounded following), + avg(salary) over (partition by deptno order by stock range between unbounded preceding and 200 following) +from emp; diff --git a/ql/src/test/results/clientpositive/windowing_windowspec3.q.out b/ql/src/test/results/clientpositive/windowing_windowspec3.q.out index 42c042f..aeb5adc 100644 --- a/ql/src/test/results/clientpositive/windowing_windowspec3.q.out +++ b/ql/src/test/results/clientpositive/windowing_windowspec3.q.out @@ -14,6 +14,7 @@ PREHOOK: query: create table emp(empno smallint, hirets timestamp, salary double, bonus double, + stock decimal(10,2), deptno tinyint) row format delimited fields terminated by '|' @@ -28,6 +29,7 @@ POSTHOOK: query: create table emp(empno smallint, hirets timestamp, salary double, bonus double, + stock decimal(10,2), deptno tinyint) row format delimited fields terminated by '|' @@ -188,3 +190,43 @@ POSTHOOK: Input: default@emp 30 7499 300.0 300.0 400.0 NULL 500.0 1400.0 266.6666666666667 30 7521 500.0 400.0 400.0 300.0 NULL 1400.0 266.6666666666667 30 7654 1400.0 1400.0 1400.0 NULL NULL NULL 550.0 +PREHOOK: query: -- Support Decimal datatype +select deptno, empno, stock, salary, + avg(salary) over (partition by deptno order by stock range 200 preceding), + avg(salary) over (partition by deptno order by stock range between 200 preceding and 200 following), + avg(salary) over (partition by deptno order by stock range between 200 preceding and 100 preceding), + avg(salary) over (partition by deptno order by stock range between 100 following and 200 following), + avg(salary) over (partition by deptno order by stock range between 200 following and unbounded following), + avg(salary) over (partition by deptno order by stock range between unbounded preceding and 200 following) +from emp +PREHOOK: type: QUERY +PREHOOK: Input: default@emp +#### A masked pattern was here #### +POSTHOOK: query: -- Support Decimal datatype +select deptno, empno, stock, salary, + avg(salary) over (partition by deptno order by stock range 200 preceding), + avg(salary) over (partition by deptno order by stock range between 200 preceding and 200 following), + avg(salary) over (partition by deptno order by stock range between 200 preceding and 100 preceding), + avg(salary) over (partition by deptno order by stock range between 100 following and 200 following), + avg(salary) over (partition by deptno order by stock range between 200 following and unbounded following), + avg(salary) over (partition by deptno order by stock range between unbounded preceding and 200 following) +from emp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@emp +#### A masked pattern was here #### +10 7839 NULL 5000.0 5000.0 5000.0 NULL NULL 1687.5 5000.0 +10 7782 50 2450.0 2450.0 1687.5 NULL 1500.0 NULL 2350.0 +10 7934 100 1300.0 1875.0 1687.5 NULL NULL NULL 2350.0 +10 7987 150.5 1500.0 1750.0 1687.5 NULL NULL NULL 2350.0 +10 7988 200 1500.0 1687.5 1687.5 2450.0 NULL NULL 2350.0 +20 7788 NULL 3000.0 1975.0 1975.0 NULL NULL 2975.0 1975.0 +20 7902 NULL 3000.0 1975.0 1975.0 NULL NULL 2975.0 1975.0 +20 7876 NULL 1100.0 1975.0 1975.0 NULL NULL 2975.0 1975.0 +20 7369 NULL 800.0 1975.0 1975.0 NULL NULL 2975.0 1975.0 +20 7566 100 2975.0 2975.0 2975.0 NULL NULL NULL 2175.0 +30 7900 NULL 950.0 1900.0 1900.0 NULL NULL 1400.0 1900.0 +30 7698 NULL 2850.0 1900.0 1900.0 NULL NULL 1400.0 1900.0 +30 7499 200.5 1600.0 1600.0 1450.0 NULL NULL 1250.0 1630.0 +30 7844 300 1500.0 1550.0 1400.0 NULL 1250.0 NULL 1566.6666666666667 +30 7521 300.5 1250.0 1450.0 1400.0 NULL 1250.0 NULL 1566.6666666666667 +30 7654 500 1250.0 1333.3333333333333 1333.3333333333333 1375.0 NULL NULL 1566.6666666666667