diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColEqualStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColEqualStringCol.java new file mode 100644 index 0000000..e21a5c4 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColEqualStringCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class FilterStringColEqualStringCol extends VectorExpression { + private int colNum1; + private int colNum2; + + public FilterStringColEqualStringCol(int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) == 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) == 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) == 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) == 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) == 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColGreaterEqualStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColGreaterEqualStringCol.java new file mode 100644 index 0000000..a87120f --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColGreaterEqualStringCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class FilterStringColGreaterEqualStringCol extends VectorExpression { + private int colNum1; + private int colNum2; + + public FilterStringColGreaterEqualStringCol(int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) >= 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) >= 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) >= 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) >= 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) >= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColGreaterStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColGreaterStringCol.java new file mode 100644 index 0000000..378ea31 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColGreaterStringCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class FilterStringColGreaterStringCol extends VectorExpression { + private int colNum1; + private int colNum2; + + public FilterStringColGreaterStringCol(int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) > 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) > 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) > 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) > 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) > 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColLessEqualStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColLessEqualStringCol.java new file mode 100644 index 0000000..7de8da9 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColLessEqualStringCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class FilterStringColLessEqualStringCol extends VectorExpression { + private int colNum1; + private int colNum2; + + public FilterStringColLessEqualStringCol(int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) <= 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) <= 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) <= 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) <= 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) <= 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColLessStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColLessStringCol.java new file mode 100644 index 0000000..896e200 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColLessStringCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class FilterStringColLessStringCol extends VectorExpression { + private int colNum1; + private int colNum2; + + public FilterStringColLessStringCol(int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) < 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) < 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) < 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) < 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) < 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java new file mode 100644 index 0000000..338fcde --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/gen/FilterStringColNotEqualStringCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class FilterStringColNotEqualStringCol extends VectorExpression { + private int colNum1; + private int colNum2; + + public FilterStringColNotEqualStringCol(int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) != 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) != 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) != 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) != 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) != 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java index 4bdd0c2..5744cf5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/CodeGen.java @@ -149,6 +149,13 @@ {"FilterStringColumnCompareScalar", "LessEqual", "<="}, {"FilterStringColumnCompareScalar", "Greater", ">"}, {"FilterStringColumnCompareScalar", "GreaterEqual", ">="}, + + {"FilterStringColumnCompareColumn", "Equal", "=="}, + {"FilterStringColumnCompareColumn", "NotEqual", "!="}, + {"FilterStringColumnCompareColumn", "Less", "<"}, + {"FilterStringColumnCompareColumn", "LessEqual", "<="}, + {"FilterStringColumnCompareColumn", "Greater", ">"}, + {"FilterStringColumnCompareColumn", "GreaterEqual", ">="}, {"FilterColumnCompareColumn", "Equal", "long", "double", "=="}, {"FilterColumnCompareColumn", "Equal", "double", "double", "=="}, @@ -262,6 +269,8 @@ private void generate() throws Exception { generateColumnUnaryMinus(tdesc); } else if (tdesc[0].equals("FilterStringColumnCompareScalar")) { generateFilterStringColumnCompareScalar(tdesc); + } else if (tdesc[0].equals("FilterStringColumnCompareColumn")) { + generateFilterStringColumnCompareColumn(tdesc); } else { continue; } @@ -371,6 +380,12 @@ private void generateFilterStringColumnCompareScalar(String[] tdesc) throws IOEx String className = "FilterStringCol" + operatorName + "StringScalar"; generateFilterStringColumnCompareScalar(tdesc,className); } + + private void generateFilterStringColumnCompareColumn(String[] tdesc) throws IOException { + String operatorName = tdesc[1]; + String className = "FilterStringCol" + operatorName + "StringCol"; + generateFilterStringColumnCompareScalar(tdesc,className); + } private void generateFilterStringColumnCompareScalar(String[] tdesc, String className) throws IOException { String operatorSymbol = tdesc[2]; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt new file mode 100644 index 0000000..9309326 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/templates/FilterStringColumnCompareColumn.txt @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class extends VectorExpression { + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } +} diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index 6371d66..6e26412 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColEqualStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen. FilterStringColGreaterEqualStringScalar; import org.junit.Test; @@ -45,6 +46,7 @@ private static byte[] greenred; private static byte[] redgreen; private static byte[] greengreen; + private static byte[] blue; private static byte[] emptyString; private static byte[] mixedUp; private static byte[] mixedUpLower; @@ -54,6 +56,7 @@ static { try { + blue = "blue".getBytes("UTF-8"); red = "red".getBytes("UTF-8"); redred = "redred".getBytes("UTF-8"); green = "green".getBytes("UTF-8"); @@ -164,6 +167,157 @@ public void testStringColCompareStringScalarFilter() { Assert.assertTrue(batch.selected[0] == 0); Assert.assertTrue(batch.selected[1] == 1); } + + @Test + public void testStringColCompareStringColFilter() { + VectorizedRowBatch batch; + VectorExpression expr; + + /* input data + * + * col0 col1 + * =============== + * blue red + * green green + * red blue + * NULL red col0 data is empty string if we un-set NULL property + */ + + // nulls possible on left, right + batch = makeStringBatchForColColCompare(); + expr = new FilterStringColLessStringCol(0,1); + expr.evaluate(batch); + Assert.assertEquals(1, batch.size); + Assert.assertEquals(0, batch.selected[0]); + + // no nulls possible + batch = makeStringBatchForColColCompare(); + batch.cols[0].noNulls = true; + batch.cols[1].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(2, batch.size); + Assert.assertEquals(3, batch.selected[1]); + + // nulls on left, no nulls on right + batch = makeStringBatchForColColCompare(); + batch.cols[1].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(1, batch.size); + Assert.assertEquals(0, batch.selected[0]); + + // nulls on right, no nulls on left + batch = makeStringBatchForColColCompare(); + batch.cols[0].noNulls = true; + batch.cols[1].isNull[3] = true; + expr.evaluate(batch); + Assert.assertEquals(1, batch.size); + Assert.assertEquals(0, batch.selected[0]); + + // Now vary isRepeating + // nulls possible on left, right + + // left repeats + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + Assert.assertEquals(3, batch.selected[2]); + + // right repeats + batch = makeStringBatchForColColCompare(); + batch.cols[1].isRepeating = true; + expr.evaluate(batch); + Assert.assertEquals(2, batch.size); // first 2 qualify + Assert.assertEquals(1, batch.selected[1]); + + // left and right repeat + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[1].isRepeating = true; + expr.evaluate(batch); + Assert.assertEquals(4, batch.size); + + // Now vary isRepeating + // nulls possible only on left + + // left repeats + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[1].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + Assert.assertEquals(3, batch.selected[2]); + + // left repeats and is null + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[1].noNulls = true; + batch.cols[0].isNull[0] = true; + expr.evaluate(batch); + Assert.assertEquals(0, batch.size); + + // right repeats + batch = makeStringBatchForColColCompare(); + batch.cols[1].isRepeating = true; + batch.cols[1].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + Assert.assertEquals(1, batch.selected[1]); + + // left and right repeat + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[1].isRepeating = true; + batch.cols[1].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(4, batch.size); + + + // Now vary isRepeating + // nulls possible only on right + + // left repeats + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + batch.cols[1].isNull[0] = true; + expr.evaluate(batch); + Assert.assertEquals(2, batch.size); + Assert.assertEquals(3, batch.selected[1]); + + // right repeats + batch = makeStringBatchForColColCompare(); + batch.cols[1].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + Assert.assertEquals(3, batch.selected[2]); + + // right repeats and is null + batch = makeStringBatchForColColCompare(); + batch.cols[1].isRepeating = true; + batch.cols[0].noNulls = true; + batch.cols[1].isNull[0] = true; + expr.evaluate(batch); + Assert.assertEquals(0, batch.size); + + // left and right repeat + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[1].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + Assert.assertEquals(4, batch.size); + + // left and right repeat and right is null + batch = makeStringBatchForColColCompare(); + batch.cols[0].isRepeating = true; + batch.cols[1].isRepeating = true; + batch.cols[0].noNulls = true; + batch.cols[1].isNull[0] = true; + expr.evaluate(batch); + Assert.assertEquals(0, batch.size); + } VectorizedRowBatch makeStringBatch() { // create a batch with one string ("Bytes") column @@ -383,6 +537,38 @@ private VectorizedRowBatch makeStringBatch2In1Out() { batch.size = 3; return batch; } + + private VectorizedRowBatch makeStringBatchForColColCompare() { + VectorizedRowBatch batch = new VectorizedRowBatch(3); + BytesColumnVector v = new BytesColumnVector(); + batch.cols[0] = v; + BytesColumnVector v2 = new BytesColumnVector(); + batch.cols[1] = v2; + batch.cols[2] = new BytesColumnVector(); + + v.setRef(0, blue, 0, blue.length); + v.isNull[0] = false; + v.setRef(1, green, 0, green.length); + v.isNull[1] = false; + v.setRef(2, red, 0, red.length); + v.isNull[2] = false; + v.setRef(3, emptyString, 0, emptyString.length); + v.isNull[3] = true; + v.noNulls = false; + + v2.setRef(0, red, 0, red.length); + v2.isNull[0] = false; + v2.setRef(1, green, 0, green.length); + v2.isNull[1] = false; + v2.setRef(2, blue, 0, blue.length); + v2.isNull[2] = false; + v2.setRef(3, red, 0, red.length); + v2.isNull[3] = false; + v2.noNulls = false; + + batch.size = 4; + return batch; + } @Test public void testStringLike() {