diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java new file mode 100644 index 0000000..065646a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColAndCol.java @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class ColAndCol extends VectorExpression { + int colNum1; + int colNum2; + int outputColumn; + + public ColAndCol(int colNum1, int colNum2, int outputColumn) + { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + if (n <= 0) { + //Nothing to do + return; + } + + //Handle null + if (inputColVector1.noNulls && !inputColVector2.noNulls) { + outV.noNulls = false; + if (inputColVector2.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = true; + } else { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVector2.isNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector2.isNull[i]; + } + } + } + } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { + outV.noNulls = false; + if (inputColVector1.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = true; + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVector1.isNull[i]; + } + } else { + for (int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector1.isNull[i]; + } + } + } + } else if (!inputColVector1.noNulls && !inputColVector2.noNulls) { + outV.noNulls = false; + if (inputColVector1.isRepeating || inputColVector2.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = true; + } else { + if (batch.selectedInUse) { + for (int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inputColVector1.isNull[i] && inputColVector2.isNull[i]; + } + } else { + for (int i = 0; i != n; i++) { + outV.isNull[i] = inputColVector1.isNull[i] && inputColVector2.isNull[i]; + } + } + } + } + + //Now disregard null in second pass. + if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = vector1[0] & vector2[0] ; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] & vector2[i] ; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] & vector2[i]; + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java new file mode 100644 index 0000000..17d209a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/ColOrCol.java @@ -0,0 +1,283 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class ColOrCol extends VectorExpression { + int colNum1; + int colNum2; + int outputColumn; + + public ColOrCol(int colNum1, int colNum2, int outputColumn) + { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + LongColumnVector inputColVector2 = (LongColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + long[] vector2 = inputColVector2.vector; + + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + if (n <= 0) { + //Nothing to do + return; + } + + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = vector1[0] | vector2[0] ; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[0] | vector2[i] ; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[0] | vector2[i]; + } + } + outV.isRepeating = false; + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[0] ; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[0]; + } + } + outV.isRepeating = false; + } else /* neither side is repeating */ { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[i] ; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[i]; + } + } + outV.isRepeating = false; + } + outV.noNulls = true; + } else if (inputColVector1.noNulls && !inputColVector2.noNulls) { + // only input 2 side has nulls + if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = vector1[0] | vector2[0] ; + outV.isNull[0] = (vector1[0] == 0) && inputColVector2.isNull[0]; + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[0] | vector2[i] ; + outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[0] | vector2[i]; + outV.isNull[i] = (vector1[0] == 0) && inputColVector2.isNull[i]; + } + } + outV.isRepeating = false; + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[0] ; + outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[0]; + outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[0]; + } + } + outV.isRepeating = false; + } else /* neither side is repeating */ { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[i] ; + outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[i]; + outV.isNull[i] = (vector1[i] == 0) && inputColVector2.isNull[i]; + } + } + outV.isRepeating = false; + } + outV.noNulls = false; + } else if (!inputColVector1.noNulls && inputColVector2.noNulls) { + // only input 1 side has nulls + if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = vector1[0] | vector2[0] ; + outV.isNull[0] = inputColVector1.isNull[0] && (vector2[0] == 0); + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[0] | vector2[i] ; + outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[0] | vector2[i]; + outV.isNull[i] = inputColVector1.isNull[0] && (vector2[i] == 0); + } + } + outV.isRepeating = false; + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[0] ; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[0]; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[0] == 0); + } + } + outV.isRepeating = false; + } else /* neither side is repeating */ { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[i] ; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[i]; + outV.isNull[i] = inputColVector1.isNull[i] && (vector2[i] == 0); + } + } + outV.isRepeating = false; + } + outV.noNulls = false; + } else /* !inputColVector1.noNulls && !inputColVector2.noNulls */ { + // either input 1 or input 2 may have nulls + if ( (inputColVector1.isRepeating) && (inputColVector2.isRepeating) ) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + outV.isRepeating = true; + outputVector[0] = vector1[0] | vector2[0] ; + outV.isNull[0] = ((vector1[0] == 0) && inputColVector2.isNull[0]) + || (inputColVector1.isNull[0] && (vector2[0] == 0)) + || (inputColVector1.isNull[0] && inputColVector2.isNull[0]); + } else if (inputColVector1.isRepeating && !inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[0] | vector2[i]; + outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + || (inputColVector1.isNull[0] && (vector2[i] == 0)) + || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[0] | vector2[i]; + outV.isNull[i] = ((vector1[0] == 0) && inputColVector2.isNull[i]) + || (inputColVector1.isNull[0] && (vector2[i] == 0)) + || (inputColVector1.isNull[0] && inputColVector2.isNull[i]); + } + } + outV.isRepeating = false; + } else if (!inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[0] ; + outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + || (inputColVector1.isNull[i] && (vector2[0] == 0)) + || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[0]; + outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[0]) + || (inputColVector1.isNull[i] && (vector2[0] == 0)) + || (inputColVector1.isNull[i] && inputColVector2.isNull[0]); + } + } + outV.isRepeating = false; + } else /* neither side is repeating */ { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = vector1[i] | vector2[i] ; + outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + || (inputColVector1.isNull[i] && (vector2[i] == 0)) + || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + outputVector[i] = vector1[i] | vector2[i]; + outV.isNull[i] = ((vector1[i] == 0) && inputColVector2.isNull[i]) + || (inputColVector1.isNull[i] && (vector2[i] == 0)) + || (inputColVector1.isNull[i] && inputColVector2.isNull[i]); + } + } + outV.isRepeating = false; + } + outV.noNulls = false; + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java new file mode 100644 index 0000000..b3f8ec1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprAndExpr.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This class represents a non leaf binary operator in the expression tree. + */ +public class FilterExprAndExpr extends VectorExpression { + VectorExpression childExpr1; + VectorExpression childExpr2; + + public FilterExprAndExpr(VectorExpression childExpr1, VectorExpression childExpr2) { + this.childExpr1 = childExpr1; + this.childExpr2 = childExpr2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + childExpr1.evaluate(batch); + childExpr2.evaluate(batch); + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java new file mode 100644 index 0000000..704de4b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterExprOrExpr.java @@ -0,0 +1,106 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This class represents an Or expression. This applies short circuit optimization. + */ +public class FilterExprOrExpr extends VectorExpression { + VectorExpression childExpr1; + VectorExpression childExpr2; + int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE]; + int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + public FilterExprOrExpr(VectorExpression childExpr1, VectorExpression childExpr2) { + this.childExpr1 = childExpr1; + this.childExpr2 = childExpr2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + int n = batch.size; + if (n <= 0) { + return; + } + boolean prevSelectInUse = batch.selectedInUse; + //Clone the selected vector + int [] sel = batch.selected; + if (batch.selectedInUse) { + for (int i = 0; i < n; i++) { + tmpSelect1[i] = sel[i]; + } + } else { + for (int i = 0; i < n; i++) { + tmpSelect1[i] = i; + sel[i] = i; + } + batch.selectedInUse = true; + } + + childExpr1.evaluate(batch); + + //Calculate unselected ones in last evaluate. + for (int i = 0; i < tmp.length; i++) { + tmp[i] = 0; + } + for (int j = 0; j < batch.size; j++) { + int i = sel[j]; + tmp[i] = 1; + } + int unselectedSize = 0; + for (int j =0; j < n; j++) { + int i = tmpSelect1[j]; + if (tmp[i] == 0) { + unselected[unselectedSize++] = i; + } + } + //Preserve current selected and size + int currentSize = batch.size; + int [] currentSelected = batch.selected; + + //Evaluate second child expression over unselected ones only. + batch.selected = unselected; + batch.size = unselectedSize; + childExpr2.evaluate(batch); + + //Merge the result of last evaluate to previous evaluate. + int newSize = batch.size + currentSize; + for (int i = batch.size; i < newSize; i++ ) { + batch.selected[i] = currentSelected[i-batch.size]; + } + batch.size = newSize; + if (newSize == n) { + //Filter didn't do anything + batch.selectedInUse = prevSelectInUse; + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java new file mode 100644 index 0000000..6f3b101 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FilterNotExpr.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * This class represents an NOT filter expression. This applies short circuit optimization. + */ +public class FilterNotExpr extends VectorExpression { + VectorExpression childExpr1; + int [] tmpSelect1 = new int[VectorizedRowBatch.DEFAULT_SIZE]; + int [] unselected = new int[VectorizedRowBatch.DEFAULT_SIZE]; + int [] tmp = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + public FilterNotExpr(VectorExpression childExpr1) { + this.childExpr1 = childExpr1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + int n = batch.size; + + if (n<=0) { + return; + } + + //Clone the selected vector + int [] sel = batch.selected; + if (batch.selectedInUse) { + for (int i = 0; i < n; i++) { + tmpSelect1[i] = sel[i]; + } + } else { + for (int i = 0; i < n; i++) { + tmpSelect1[i] = i; + sel[i] = i; + } + batch.selectedInUse = true; + } + + childExpr1.evaluate(batch); + + //Calculate unselected ones in last evaluate. + for (int i = 0; i < tmp.length; i++) { + tmp[i] = 0; + } + for (int j = 0; j < batch.size; j++) { + int i = sel[j]; + tmp[i] = 1; + } + int unselectedSize = 0; + for (int j =0; j < n; j++) { + int i = tmpSelect1[j]; + if (tmp[i] == 0) { + unselected[unselectedSize++] = i; + } + } + + //The unselected is the new selected + batch.selected = unselected; + batch.size = unselectedSize; + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java new file mode 100644 index 0000000..0a1e3a1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IdentityExpression.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IdentityExpression extends VectorExpression { + int colNum; + String type; + + public IdentityExpression(int colNum, String type) { + this.colNum = colNum; + this.type = type; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + this.evaluateChildren(batch); + } + } + + @Override + public int getOutputColumn() { + return colNum; + } + + @Override + public String getOutputType() { + return type; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java new file mode 100644 index 0000000..a2aadca --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNotNull.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IsNotNull extends VectorExpression { + int colNum; + int outputColumn; + + public IsNotNull(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector inputColVector = batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector; + + if (n <= 0) { + //Nothing to do + return; + } + + if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Selection property will not change. + if (nullPos[0]) { + outputVector[0] = 0; + } else { + outputVector[0] = 1; + } + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (nullPos[i]) { + outputVector[i] = 0; + } else { + outputVector[i] = 1; + } + } + } + else { + for(int i = 0; i != n; i++) { + if (nullPos[i]) { + outputVector[i] = 0; + } else { + outputVector[i] = 1; + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java new file mode 100644 index 0000000..01f3a9c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IsNull.java @@ -0,0 +1,98 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class IsNull extends VectorExpression { + int colNum; + int outputColumn; + + public IsNull(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector inputColVector = batch.cols[colNum]; + int[] sel = batch.selected; + //Note: if type of isNull could be long[], could we just re-use this + //vector as the output vector. No iterations would be needed. + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + long[] outputVector = ((LongColumnVector) batch.cols[outputColumn]).vector; + if (n <= 0) { + //Nothing to do, this is EOF + return; + } + + // output never has nulls for this operator + batch.cols[outputColumn].noNulls = true; + if (inputColVector.isRepeating && inputColVector.noNulls) { + outputVector[0] = 0; + batch.cols[outputColumn].isRepeating = true; + } else if (inputColVector.isRepeating && !inputColVector.noNulls) { + outputVector[0] = nullPos[0] ? 1 : 0; + batch.cols[outputColumn].isRepeating = true; + } else if (!inputColVector.isRepeating && inputColVector.noNulls) { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = 0; + } + } + else { + for(int i = 0; i != n; i++) { + outputVector[i] = 0; + } + } + batch.cols[outputColumn].isRepeating = false; + } else /* !inputColVector.isRepeating && !inputColVector.noNulls */ { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = nullPos[i] ? 1 : 0; + } + } + else { + for(int i = 0; i != n; i++) { + outputVector[i] = nullPos[i] ? 1 : 0; + } + } + batch.cols[outputColumn].isRepeating = false; + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java new file mode 100644 index 0000000..d60fca4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/NotCol.java @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class NotCol extends VectorExpression { + int colNum; + int outputColumn; + + public NotCol(int colNum, int outputColumn) { + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector = (LongColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector = inputColVector.vector; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + long[] outputVector = outV.vector; + + if (n <= 0) { + //Nothing to do, this is EOF + return; + } + + if (inputColVector.isRepeating) { + outV.isRepeating = true; + // mask out all but low order bit with "& 1" so NOT 1 yields 0, NOT 0 yields 1 + outputVector[0] = ~vector[0] & 1; + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outputVector[i] = ~vector[i] & 1; + } + outV.isRepeating = false; + } + else { + for(int i = 0; i != n; i++) { + outputVector[i] = ~vector[i] & 1; + } + outV.isRepeating = false; + } + + // handle NULLs + if (inputColVector.noNulls) { + outV.noNulls = true; + } else { + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isNull[0] = inputColVector.isNull[0]; + } else { + System.arraycopy(inputColVector.isNull, 0, outV.isNull, 0, n); + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java new file mode 100644 index 0000000..ef64e4b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsFalse.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class SelectColumnIsFalse extends VectorExpression { + int colNum1; + + public SelectColumnIsFalse(int colNum1) + { + this.colNum1 = colNum1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + boolean[] nullVector = inputColVector1.isNull; + + if (n <= 0) { + //Nothing to do + return; + } + + if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (vector1[0] == 1) { + // All are filtered out + batch.size = 0; + } + } else if (batch.selectedInUse) { + int[] newSelected = new int[n]; + int newSize = 0; + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 0) { + newSelected[newSize++] = i; + } + } + batch.size = newSize; + batch.selected = newSelected; + } else { + int newSize = 0; + for (int i = 0; i != n; i++) { + if (vector1[i] == 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.selectedInUse = true; + batch.size = newSize; + } + } + } else { + if (inputColVector1.isRepeating) { + //Repeating and null value + batch.size = 0; + } else if (batch.selectedInUse) { + int[] newSelected = new int[n]; + int newSize = 0; + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 0 && !nullVector[i]) { + newSelected[newSize++] = i; + } + } + batch.size = newSize; + batch.selected = newSelected; + } else { + int newSize = 0; + for (int i = 0; i != n; i++) { + if (vector1[i] == 0 && !nullVector[i]) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.selectedInUse = true; + batch.size = newSize; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java new file mode 100644 index 0000000..2d929a8 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNotNull.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class SelectColumnIsNotNull extends VectorExpression { + int colNum; + + public SelectColumnIsNotNull(int colNum) { + this.colNum = colNum; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + ColumnVector inputColVector = batch.cols[colNum]; + int[] sel = batch.selected; + //Note: if type of isNull could be long[], could we just re-use this + //vector as the output vector. No iterations would be needed. + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + if (n <= 0) { + //Nothing to do + return; + } + + if (inputColVector.noNulls) { + //All selected, do nothing + return; + } else if (inputColVector.isRepeating) { + //All must be null + batch.size = 0; + } else if (batch.selectedInUse) { + int [] newSelected = new int[n]; + int newSize=0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + newSelected[newSize++] = i; + } + } + batch.selected = newSelected; + batch.size = newSize; + } + else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.selectedInUse = true; + batch.size = newSize; + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java new file mode 100644 index 0000000..446f071 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsNull.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class SelectColumnIsNull extends VectorExpression { + int colNum; + + public SelectColumnIsNull(int colNum) { + this.colNum = colNum; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + ColumnVector inputColVector = batch.cols[colNum]; + int[] sel = batch.selected; + //Note: if type of isNull could be long[], could we just re-use this + //vector as the output vector. No iterations would be needed. + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + if (n <= 0) { + //Nothing to do + return; + } + + if (inputColVector.noNulls) { + batch.size = 0; + } else if (inputColVector.isRepeating) { + //All must be selected otherwise size would be zero + //Selection property will not change. + return; + } else if (batch.selectedInUse) { + int [] newSelected = new int[n]; + int newSize=0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (nullPos[i]) { + newSelected[newSize++] = i; + } + } + batch.selected = newSelected; + batch.size = newSize; + } + else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (nullPos[i]) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.selectedInUse = true; + batch.size = newSize; + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java new file mode 100644 index 0000000..b1bce44 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/SelectColumnIsTrue.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class SelectColumnIsTrue extends VectorExpression { + int colNum1; + + public SelectColumnIsTrue(int colNum1) { + this.colNum1 = colNum1; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + LongColumnVector inputColVector1 = (LongColumnVector) batch.cols[colNum1]; + int[] sel = batch.selected; + int n = batch.size; + long[] vector1 = inputColVector1.vector; + boolean[] nullVector = inputColVector1.isNull; + + if (n <= 0) { + //Nothing to do + return; + } + + if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating) { + // All must be selected otherwise size would be zero + // Repeating property will not change. + if (vector1[0] == 0) { + // All are filtered out + batch.size = 0; + } + } else if (batch.selectedInUse) { + int[] newSelected = new int[n]; + int newSize = 0; + for (int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + newSelected[newSize++] = i; + } + } + batch.size = newSize; + batch.selected = newSelected; + } else { + int newSize = 0; + for (int i = 0; i != n; i++) { + if (vector1[i] == 1) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.selectedInUse = true; + batch.size = newSize; + } + } + } else { + if (inputColVector1.isRepeating) { + //Repeating null value + batch.size = 0; + } else if (batch.selectedInUse) { + int [] newSelected = new int[n]; + int newSize=0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1 && !nullVector[i]) { + newSelected[newSize++] = i; + } + } + batch.size = newSize; + batch.selected = newSelected; + } else { + int newSize=0; + for(int i = 0; i != n; i++) { + if (vector1[i] == 1 && !nullVector[i]) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.selectedInUse = true; + batch.size = newSize; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java index 707e75f..1739976 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorFilterExpressions.java @@ -24,6 +24,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColEqualLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColGreaterLongScalar; +import org.junit.Assert; import org.junit.Test; public class TestVectorFilterExpressions { @@ -65,4 +67,69 @@ public void testFilterLongColEqualLongColumn() { assertEquals(1, vrg.size); assertEquals(5, vrg.selected[0]); } + + @Test + public void testColOpScalarNumericFilterNullAndRepeatingLogic() + { + // No nulls, not repeating + FilterLongColGreaterLongScalar f = new FilterLongColGreaterLongScalar(0, 1); + VectorizedRowBatch batch = this.getSimpleLongBatch(); + + batch.cols[0].noNulls = true; + batch.cols[0].isRepeating = false; + f.evaluate(batch); + // only last 2 rows qualify + Assert.assertEquals(2, batch.size); + // show that their positions are recorded + Assert.assertTrue(batch.selectedInUse); + Assert.assertEquals(2, batch.selected[0]); + Assert.assertEquals(3, batch.selected[1]); + + // make everything qualify and ensure selected is not in use + f = new FilterLongColGreaterLongScalar(0, -1); // col > -1 + batch = getSimpleLongBatch(); + f.evaluate(batch); + Assert.assertFalse(batch.selectedInUse); + Assert.assertEquals(4, batch.size); + + // has nulls, not repeating + batch = getSimpleLongBatch(); + f = new FilterLongColGreaterLongScalar(0, 1); // col > 1 + batch.cols[0].noNulls = false; + batch.cols[0].isRepeating = false; + batch.cols[0].isNull[3] = true; + f.evaluate(batch); + Assert.assertTrue(batch.selectedInUse); + Assert.assertEquals(1, batch.size); + Assert.assertEquals(2, batch.selected[0]); + + // no nulls, is repeating + batch = getSimpleLongBatch(); + f = new FilterLongColGreaterLongScalar(0, -1); // col > -1 + batch.cols[0].noNulls = true; + batch.cols[0].isRepeating = true; + f.evaluate(batch); + Assert.assertFalse(batch.selectedInUse); + Assert.assertEquals(4, batch.size); // everything qualifies (4 rows, all with value -1) + + // has nulls, is repeating + batch = getSimpleLongBatch(); + batch.cols[0].noNulls = false; + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + f.evaluate(batch); + Assert.assertEquals(0, batch.size); // all values are null so none qualify + } + + private VectorizedRowBatch getSimpleLongBatch() { + VectorizedRowBatch batch = VectorizedRowGroupGenUtil + .getVectorizedRowBatch(4, 1, 1); + LongColumnVector lcv0 = (LongColumnVector) batch.cols[0]; + + lcv0.vector[0] = 0; + lcv0.vector[1] = 1; + lcv0.vector[2] = 2; + lcv0.vector[3] = 3; + return batch; + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java new file mode 100644 index 0000000..d38cc5d --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorLogicalExpressions.java @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.junit.Assert; +import org.junit.Test; + + +public class TestVectorLogicalExpressions { + + @Test + public void testLongColOrLongCol() { + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + ColOrCol expr = new ColOrCol(0,1,2); + LongColumnVector outCol = (LongColumnVector) batch.cols[2]; + expr.evaluate(batch); + // verify + Assert.assertEquals(0, outCol.vector[0]); + Assert.assertEquals(1, outCol.vector[1]); + Assert.assertEquals(1, outCol.vector[2]); + Assert.assertEquals(1, outCol.vector[3]); + Assert.assertFalse(outCol.isNull[3]); + Assert.assertTrue(outCol.isNull[4]); + Assert.assertEquals(1, outCol.vector[5]); + Assert.assertTrue(outCol.isNull[6]); + Assert.assertEquals(1, outCol.vector[7]); + Assert.assertTrue(outCol.isNull[8]); + + Assert.assertEquals(batch.size, 9); + Assert.assertFalse(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // try non-null path + batch = getBatchThreeBooleanCols(); + batch.cols[0].noNulls = true; + batch.cols[1].noNulls = true; + batch.cols[2].noNulls = false; + outCol = (LongColumnVector) batch.cols[2]; + expr.evaluate(batch); + + // spot check + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(0, outCol.vector[0]); + Assert.assertEquals(1, outCol.vector[1]); + Assert.assertEquals(1, outCol.vector[2]); + Assert.assertEquals(1, outCol.vector[3]); + + // try isRepeating path (left input only), no nulls + batch = getBatchThreeBooleanCols(); + batch.cols[0].noNulls = true; batch.cols[0].isRepeating = true; + batch.cols[1].noNulls = true; batch.cols[1].isRepeating = false; + batch.cols[2].noNulls = false; batch.cols[2].isRepeating = true; + outCol = (LongColumnVector) batch.cols[2]; + expr.evaluate(batch); + + // spot check + Assert.assertFalse(outCol.isRepeating); + Assert.assertEquals(0, outCol.vector[0]); + Assert.assertEquals(1, outCol.vector[1]); + Assert.assertEquals(0, outCol.vector[2]); + Assert.assertEquals(1, outCol.vector[3]); + } + + /** + * Get a batch with three boolean (long) columns. + */ + private VectorizedRowBatch getBatchThreeBooleanCols() { + VectorizedRowBatch batch = new VectorizedRowBatch(3, VectorizedRowBatch.DEFAULT_SIZE); + LongColumnVector v0, v1, v2; + v0 = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + v1 = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + v2 = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + batch.cols[0] = v0; + batch.cols[1] = v1; + batch.cols[2] = v2; + + // add some data and nulls + int i; + i = 0; v0.vector[i] = 0; v0.isNull[i] = false; v1.vector[i] = 0; v1.isNull[i] = false; // 0 0 + i = 1; v0.vector[i] = 0; v0.isNull[i] = false; v1.vector[i] = 1; v1.isNull[i] = false; // 0 1 + i = 2; v0.vector[i] = 1; v0.isNull[i] = false; v1.vector[i] = 0; v1.isNull[i] = false; // 1 0 + i = 3; v0.vector[i] = 1; v0.isNull[i] = false; v1.vector[i] = 1; v1.isNull[i] = false; // 1 1 + i = 4; v0.vector[i] = 0; v0.isNull[i] = true; v1.vector[i] = 0; v1.isNull[i] = false; // NULL 0 + i = 5; v0.vector[i] = 0; v0.isNull[i] = true; v1.vector[i] = 1; v1.isNull[i] = false; // NULL 1 + i = 6; v0.vector[i] = 0; v0.isNull[i] = false; v1.vector[i] = 0; v1.isNull[i] = true; // 0 NULL + i = 7; v0.vector[i] = 1; v0.isNull[i] = false; v1.vector[i] = 1; v1.isNull[i] = true; // 1 NULL + i = 8; v0.vector[i] = 1; v0.isNull[i] = true; v1.vector[i] = 1; v1.isNull[i] = true; // NULL NULL + + v0.noNulls = false; + v1.noNulls = false; + v0.isRepeating = false; + v1.isRepeating = false; + + v2.isRepeating = true; // this value should get over-written with correct value + v2.noNulls = true; // ditto + + batch.size = 9; + return batch; + } + + @Test + public void testBooleanNot() { + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + NotCol expr = new NotCol(0,2); + LongColumnVector outCol = (LongColumnVector) batch.cols[2]; + expr.evaluate(batch); + + // Case with nulls + Assert.assertFalse(outCol.isRepeating); + Assert.assertEquals(1, outCol.vector[0]); Assert.assertFalse(outCol.isNull[0]); + Assert.assertEquals(0, outCol.vector[2]); Assert.assertFalse(outCol.isNull[0]); + Assert.assertTrue(outCol.isNull[4]); + + // No nulls case + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertFalse(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(1, outCol.vector[0]); + Assert.assertEquals(0, outCol.vector[2]); + + // isRepeating, and there are nulls + batch = getBatchThreeBooleanCols(); + outCol = (LongColumnVector) batch.cols[2]; + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating);; + Assert.assertTrue(outCol.isNull[0]); + + // isRepeating, and no nulls + batch = getBatchThreeBooleanCols(); + outCol = (LongColumnVector) batch.cols[2]; + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(1, outCol.vector[0]); + } + + @Test + public void testIsNullExpr () { + // has nulls, not repeating + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + IsNull expr = new IsNull(0,2); + LongColumnVector outCol = (LongColumnVector) batch.cols[2]; + expr.evaluate(batch); + Assert.assertEquals(0, outCol.vector[0]); + Assert.assertEquals(1, outCol.vector[4]); + Assert.assertTrue(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // No nulls case, not repeating + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertFalse(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(0, outCol.vector[0]); + Assert.assertEquals(0, outCol.vector[4]); + + // isRepeating, and there are nulls + batch = getBatchThreeBooleanCols(); + outCol = (LongColumnVector) batch.cols[2]; + batch.cols[0].isRepeating = true; + batch.cols[0].isNull[0] = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating);; + Assert.assertEquals(1, outCol.vector[0]); + Assert.assertTrue(outCol.noNulls); + + // isRepeating, and no nulls + batch = getBatchThreeBooleanCols(); + outCol = (LongColumnVector) batch.cols[2]; + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + Assert.assertEquals(0, outCol.vector[0]); + } + + @Test + public void testBooleanFiltersOnColumns() { + VectorizedRowBatch batch = getBatchThreeBooleanCols(); + + SelectColumnIsTrue expr = new SelectColumnIsTrue(0); + expr.evaluate(batch); + assertEquals(3, batch.size); + assertEquals(2, batch.selected[0]); + assertEquals(3, batch.selected[1]); + assertEquals(7, batch.selected[2]); + + batch = getBatchThreeBooleanCols(); + SelectColumnIsFalse expr1 = new SelectColumnIsFalse(1); + expr1.evaluate(batch); + assertEquals(3, batch.size); + assertEquals(0, batch.selected[0]); + assertEquals(2, batch.selected[1]); + assertEquals(4, batch.selected[2]); + + batch = getBatchThreeBooleanCols(); + SelectColumnIsNull expr2 = new SelectColumnIsNull(0); + expr2.evaluate(batch); + assertEquals(3, batch.size); + assertEquals(4, batch.selected[0]); + assertEquals(5, batch.selected[1]); + assertEquals(8, batch.selected[2]); + + batch = getBatchThreeBooleanCols(); + SelectColumnIsNotNull expr3 = new SelectColumnIsNotNull(1); + expr3.evaluate(batch); + assertEquals(6, batch.size); + assertEquals(0, batch.selected[0]); + assertEquals(1, batch.selected[1]); + assertEquals(2, batch.selected[2]); + assertEquals(3, batch.selected[3]); + assertEquals(4, batch.selected[4]); + assertEquals(5, batch.selected[5]); + } +}