diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java index caacf3d..be717a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColEqualLongScalar.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] == value ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java index 433162c..cd8d723 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterEqualLongScalar.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] >= value ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java index 088b265..16148f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColGreaterLongScalar.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] > value ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java index 42bd807..927856f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessEqualLongScalar.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] <= value ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java index 67fb47e..ac10a83 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColLessLongScalar.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] < value ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java index 5352186..789ca3e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongColNotEqualLongScalar.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = vector[i] != value ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java index 98fd0e8..09d845c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarEqualLongColumn.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value == vector[i] ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java index 12a1c7f..afc80eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterEqualLongColumn.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value >= vector[i] ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java index 1fe2ec3..e2e871d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarGreaterLongColumn.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value > vector[i] ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java index af3a4ab..ae675ed 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessEqualLongColumn.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value <= vector[i] ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java index 0963367..c664e35 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarLessLongColumn.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value < vector[i] ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java index 33ad3c2..776ab9f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/LongScalarNotEqualLongColumn.java @@ -91,10 +91,10 @@ public void evaluate(VectorizedRowBatch batch) { } outputColVector.isRepeating = true; } else if (batch.selectedInUse) { - System.arraycopy(nullPos, 0, outNulls, 0, n); for(int j=0; j != n; j++) { int i = sel[j]; outputVector[i] = value != vector[i] ? 1 : 0; + outNulls[i] = nullPos[i]; } } else { System.arraycopy(nullPos, 0, outNulls, 0, n); diff --git ql/src/test/queries/clientpositive/vector_number_compare_projection.q ql/src/test/queries/clientpositive/vector_number_compare_projection.q new file mode 100644 index 0000000..feb5e98 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_number_compare_projection.q @@ -0,0 +1,192 @@ +set hive.cli.print.header=true; +set hive.explain.user=false; +SET hive.auto.convert.join=true; +set hive.fetch.task.conversion=none; +set hive.mapred.mode=nonstrict; + +-- SORT_QUERY_RESULTS + +create table vectortab2k( + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + dc decimal(38,18), + bo boolean, + s string, + s2 string, + ts timestamp, + ts2 timestamp, + dt date) +ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../../data/files/vectortab2k' OVERWRITE INTO TABLE vectortab2k; + +CREATE TABLE scratch AS SELECT t, si, i, b, f, d, dc FROM vectortab2k; +INSERT INTO TABLE scratch VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL); + +CREATE TABLE vectortab2k_orc STORED AS ORC AS SELECT * FROM scratch; + +SET hive.vectorized.execution.enabled=true; + +-- +-- Projection LongColLongScalar +-- +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc + order by t, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc + order by t, si, i) as q; + +EXPLAIN +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc + order by t, si, i, b) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc + order by t, si, i, b) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (t < 0) as compare1, (si <= 0) as compare2, (i = 0) as compare3 from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (t > 0) as compare1, (si >= 0) as compare2, (i != 0) as compare3, (b > 0) as compare4 from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q; + + +-- +-- Projection LongScalarLongColumn +-- +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (0 < t) as compare1, (0 <= si) as compare2, (0 = i) as compare3 from vectortab2k_orc + order by t, si, i) as q; + +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (0 > t) as compare1, (0 >= si) as compare2, (0 != i) as compare3, (0 > b) as compare4 from vectortab2k_orc + order by t, si, i, b) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t, si, i, (0 < t) as compare1, (0 <= si) as compare2, (0 = i) as compare3 from vectortab2k_orc + where pmod(t, 4) > 1 + order by t, si, i) as q; +SELECT sum(hash(*)) FROM + (SELECT t, si, i, b, (0 > t) as compare1, (0 >= si) as compare2, (0 != i) as compare3, (0 > b) as compare4 from vectortab2k_orc + where pmod(t, 4) < 2 + order by t, si, i, b) as q; + +SET hive.vectorized.execution.enabled=false; + +CREATE TABLE scratch_repeat AS SELECT t, si, i, b, bo, 20 as t_repeat, + 9000 as si_repeat, 9233320 as i_repeat, -823823999339992 as b_repeat, false as bo_repeat_false, true as bo_repeat_true FROM vectortab2k; + +-- The repeated columns ought to create repeated VectorizedRowBatch for those columns. +-- And then when we do a comparison, we should generate a repeated boolean result. +CREATE TABLE vectortab2k_orc_repeat STORED AS ORC AS SELECT * FROM scratch_repeat; + +SET hive.vectorized.execution.enabled=true; + +-- +-- Projection LongColLongScalar +-- +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (t_repeat > 0) as compare1, (si_repeat >= 0) as compare2, (i_repeat = 0) as compare3 from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i_repeat) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (t_repeat < 0) as compare1, (si_repeat <=0) as compare2, (i_repeat != 0) as compare3 from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i_repeat) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (t_repeat > 0) as compare1, (si_repeat >= 0) as compare2, (i_repeat = 0) as compare3 from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si_repeat, i_repeat) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (t_repeat < 0) as compare1, (si_repeat <=0) as compare2, (i_repeat != 0) as compare3 from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si_repeat, i_repeat) as q; + +-- +-- Projection LongScalarLongColumn +-- +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (0 > t_repeat) as compare1, (0 >= si_repeat) as compare2, (0 = i_repeat) as compare3 from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i_repeat) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (0 < t_repeat) as compare1, (0 <= si_repeat) as compare2, (0 != i_repeat) as compare3 from vectortab2k_orc_repeat + order by t_repeat, si_repeat, i_repeat) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (0 > t_repeat) as compare1, (0 >= si_repeat) as compare2, (0 = i_repeat) as compare3 from vectortab2k_orc_repeat + where pmod(si, 4) = 0 + order by t_repeat, si_repeat, i_repeat) as q; +SELECT sum(hash(*)) FROM + (SELECT t_repeat, si_repeat, i_repeat, (0 < t_repeat) as compare1, (0 <= si_repeat) as compare2, (0 != i_repeat) as compare3 from vectortab2k_orc_repeat + where pmod(si, 4) = 3 + order by t_repeat, si_repeat, i_repeat) as q; + +SET hive.vectorized.execution.enabled=false; + +CREATE TABLE scratch_null AS SELECT t, si, i, b, bo, + cast(null as tinyint) as t_null, cast(null as smallint) as si_null, cast(null as int) as i_null, cast(null as bigint) as b_null, cast(null as boolean) as bo_null FROM vectortab2k; + +-- The nulled columns ought to create repeated null VectorizedRowBatch for those columns. +CREATE TABLE vectortab2k_orc_null STORED AS ORC AS SELECT * FROM scratch_null; + +SET hive.vectorized.execution.enabled=true; + +-- +-- Projection LongColLongScalar +-- +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null > 0) as compare1, (si_null >= 0) as compare2, (i_null = 0) as compare3 from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0) as compare1, (si_null <=0) as compare2, (i_null != 0) as compare3 from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null > 0) as compare1, (si_null >= 0) as compare2, (i_null = 0) as compare3 from vectortab2k_orc_null + where pmod(si, 4) = 0 + order by t_null, si_null, i_null) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (t_null < 0) as compare1, (si_null <=0) as compare2, (i_null != 0) as compare3 from vectortab2k_orc_null + where pmod(si, 4) = 3 + order by t_null, si_null, i_null) as q; + +-- +-- Projection LongScalarLongColumn +-- +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (0 > t_null) as compare1, (0 >= si_null) as compare2, (0 = i_null) as compare3 from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (0 < t_null) as compare1, (0 <= si_null) as compare2, (0 != i_null) as compare3 from vectortab2k_orc_null + order by t_null, si_null, i_null) as q; + +-- With some filtering +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (0 > t_null) as compare1, (0 >= si_null) as compare2, (0 = i_null) as compare3 from vectortab2k_orc_null + where pmod(si, 4) = 0 + order by t_null, si_null, i_null) as q; +SELECT sum(hash(*)) FROM + (SELECT t_null, si_null, i_null, (0 < t_null) as compare1, (0 <= si_null) as compare2, (0 != i_null) as compare3 from vectortab2k_orc_null + where pmod(si, 4) = 3 + order by t_null, si_null, i_null) as q; + diff --git ql/src/test/results/clientpositive/vector_number_compare_projection.q.out ql/src/test/results/clientpositive/vector_number_compare_projection.q.out index 8f5193f..85f7cc8 100644 --- ql/src/test/results/clientpositive/vector_number_compare_projection.q.out +++ ql/src/test/results/clientpositive/vector_number_compare_projection.q.out @@ -137,6 +137,7 @@ STAGE PLANS: sort order: +++ Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,VALUE._col0,VALUE._col1,VALUE._col2) (type: int) @@ -206,6 +207,7 @@ STAGE PLANS: sort order: ++++ Statistics: Num rows: 2001 Data size: 273608 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean) + Execution mode: vectorized Reduce Operator Tree: Select Operator expressions: hash(KEY.reducesinkkey0,KEY.reducesinkkey1,KEY.reducesinkkey2,KEY.reducesinkkey3,VALUE._col0,VALUE._col1,VALUE._col2,VALUE._col3) (type: int)