diff --git ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java index efccba0..6993f06 100644 --- ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java +++ ant/src/org/apache/hadoop/hive/ant/GenVectorCode.java @@ -232,29 +232,98 @@ {"FilterScalarCompareColumn", "GreaterEqual", "long", "long", ">="}, {"FilterScalarCompareColumn", "GreaterEqual", "double", "long", ">="}, - {"FilterStringColumnCompareScalar", "Equal", "=="}, - {"FilterStringColumnCompareScalar", "NotEqual", "!="}, - {"FilterStringColumnCompareScalar", "Less", "<"}, - {"FilterStringColumnCompareScalar", "LessEqual", "<="}, - {"FilterStringColumnCompareScalar", "Greater", ">"}, - {"FilterStringColumnCompareScalar", "GreaterEqual", ">="}, + {"FilterStringGroupColumnCompareStringGroupScalarBase", "Equal", "=="}, + {"FilterStringGroupColumnCompareStringGroupScalarBase", "NotEqual", "!="}, + {"FilterStringGroupColumnCompareStringGroupScalarBase", "Less", "<"}, + {"FilterStringGroupColumnCompareStringGroupScalarBase", "LessEqual", "<="}, + {"FilterStringGroupColumnCompareStringGroupScalarBase", "Greater", ">"}, + {"FilterStringGroupColumnCompareStringGroupScalarBase", "GreaterEqual", ">="}, + + {"FilterStringGroupColumnCompareStringScalar", "Equal", "=="}, + {"FilterStringGroupColumnCompareStringScalar", "NotEqual", "!="}, + {"FilterStringGroupColumnCompareStringScalar", "Less", "<"}, + {"FilterStringGroupColumnCompareStringScalar", "LessEqual", "<="}, + {"FilterStringGroupColumnCompareStringScalar", "Greater", ">"}, + {"FilterStringGroupColumnCompareStringScalar", "GreaterEqual", ">="}, + + {"FilterStringGroupColumnCompareTruncStringScalar", "VarChar", "Equal", "=="}, + {"FilterStringGroupColumnCompareTruncStringScalar", "VarChar", "NotEqual", "!="}, + {"FilterStringGroupColumnCompareTruncStringScalar", "VarChar", "Less", "<"}, + {"FilterStringGroupColumnCompareTruncStringScalar", "VarChar", "LessEqual", "<="}, + {"FilterStringGroupColumnCompareTruncStringScalar", "VarChar", "Greater", ">"}, + {"FilterStringGroupColumnCompareTruncStringScalar", "VarChar", "GreaterEqual", ">="}, + + {"FilterStringGroupColumnCompareTruncStringScalar", "Char", "Equal", "=="}, + {"FilterStringGroupColumnCompareTruncStringScalar", "Char", "NotEqual", "!="}, + {"FilterStringGroupColumnCompareTruncStringScalar", "Char", "Less", "<"}, + {"FilterStringGroupColumnCompareTruncStringScalar", "Char", "LessEqual", "<="}, + {"FilterStringGroupColumnCompareTruncStringScalar", "Char", "Greater", ">"}, + {"FilterStringGroupColumnCompareTruncStringScalar", "Char", "GreaterEqual", ">="}, {"FilterStringColumnBetween", ""}, {"FilterStringColumnBetween", "!"}, - {"StringColumnCompareScalar", "Equal", "=="}, - {"StringColumnCompareScalar", "NotEqual", "!="}, - {"StringColumnCompareScalar", "Less", "<"}, - {"StringColumnCompareScalar", "LessEqual", "<="}, - {"StringColumnCompareScalar", "Greater", ">"}, - {"StringColumnCompareScalar", "GreaterEqual", ">="}, - - {"FilterStringScalarCompareColumn", "Equal", "=="}, - {"FilterStringScalarCompareColumn", "NotEqual", "!="}, - {"FilterStringScalarCompareColumn", "Less", "<"}, - {"FilterStringScalarCompareColumn", "LessEqual", "<="}, - {"FilterStringScalarCompareColumn", "Greater", ">"}, - {"FilterStringScalarCompareColumn", "GreaterEqual", ">="}, + {"FilterTruncStringColumnBetween", "VarChar", ""}, + {"FilterTruncStringColumnBetween", "VarChar", "!"}, + + {"FilterTruncStringColumnBetween", "Char", ""}, + {"FilterTruncStringColumnBetween", "Char", "!"}, + + {"StringGroupColumnCompareStringGroupScalarBase", "Equal", "=="}, + {"StringGroupColumnCompareStringGroupScalarBase", "NotEqual", "!="}, + {"StringGroupColumnCompareStringGroupScalarBase", "Less", "<"}, + {"StringGroupColumnCompareStringGroupScalarBase", "LessEqual", "<="}, + {"StringGroupColumnCompareStringGroupScalarBase", "Greater", ">"}, + {"StringGroupColumnCompareStringGroupScalarBase", "GreaterEqual", ">="}, + + {"StringGroupColumnCompareStringScalar", "Equal", "=="}, + {"StringGroupColumnCompareStringScalar", "NotEqual", "!="}, + {"StringGroupColumnCompareStringScalar", "Less", "<"}, + {"StringGroupColumnCompareStringScalar", "LessEqual", "<="}, + {"StringGroupColumnCompareStringScalar", "Greater", ">"}, + {"StringGroupColumnCompareStringScalar", "GreaterEqual", ">="}, + + {"StringGroupColumnCompareTruncStringScalar", "VarChar", "Equal", "=="}, + {"StringGroupColumnCompareTruncStringScalar", "VarChar", "NotEqual", "!="}, + {"StringGroupColumnCompareTruncStringScalar", "VarChar", "Less", "<"}, + {"StringGroupColumnCompareTruncStringScalar", "VarChar", "LessEqual", "<="}, + {"StringGroupColumnCompareTruncStringScalar", "VarChar", "Greater", ">"}, + {"StringGroupColumnCompareTruncStringScalar", "VarChar", "GreaterEqual", ">="}, + + {"StringGroupColumnCompareTruncStringScalar", "Char", "Equal", "=="}, + {"StringGroupColumnCompareTruncStringScalar", "Char", "NotEqual", "!="}, + {"StringGroupColumnCompareTruncStringScalar", "Char", "Less", "<"}, + {"StringGroupColumnCompareTruncStringScalar", "Char", "LessEqual", "<="}, + {"StringGroupColumnCompareTruncStringScalar", "Char", "Greater", ">"}, + {"StringGroupColumnCompareTruncStringScalar", "Char", "GreaterEqual", ">="}, + + {"FilterStringGroupScalarCompareStringGroupColumnBase", "Equal", "=="}, + {"FilterStringGroupScalarCompareStringGroupColumnBase", "NotEqual", "!="}, + {"FilterStringGroupScalarCompareStringGroupColumnBase", "Less", "<"}, + {"FilterStringGroupScalarCompareStringGroupColumnBase", "LessEqual", "<="}, + {"FilterStringGroupScalarCompareStringGroupColumnBase", "Greater", ">"}, + {"FilterStringGroupScalarCompareStringGroupColumnBase", "GreaterEqual", ">="}, + + {"FilterStringScalarCompareStringGroupColumn", "Equal", "=="}, + {"FilterStringScalarCompareStringGroupColumn", "NotEqual", "!="}, + {"FilterStringScalarCompareStringGroupColumn", "Less", "<"}, + {"FilterStringScalarCompareStringGroupColumn", "LessEqual", "<="}, + {"FilterStringScalarCompareStringGroupColumn", "Greater", ">"}, + {"FilterStringScalarCompareStringGroupColumn", "GreaterEqual", ">="}, + + {"FilterTruncStringScalarCompareStringGroupColumn", "VarChar", "Equal", "=="}, + {"FilterTruncStringScalarCompareStringGroupColumn", "VarChar", "NotEqual", "!="}, + {"FilterTruncStringScalarCompareStringGroupColumn", "VarChar", "Less", "<"}, + {"FilterTruncStringScalarCompareStringGroupColumn", "VarChar", "LessEqual", "<="}, + {"FilterTruncStringScalarCompareStringGroupColumn", "VarChar", "Greater", ">"}, + {"FilterTruncStringScalarCompareStringGroupColumn", "VarChar", "GreaterEqual", ">="}, + + {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "Equal", "=="}, + {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "NotEqual", "!="}, + {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "Less", "<"}, + {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "LessEqual", "<="}, + {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "Greater", ">"}, + {"FilterTruncStringScalarCompareStringGroupColumn", "Char", "GreaterEqual", ">="}, {"FilterDecimalColumnCompareScalar", "Equal", "=="}, {"FilterDecimalColumnCompareScalar", "NotEqual", "!="}, @@ -277,26 +346,47 @@ {"FilterDecimalColumnCompareColumn", "Greater", ">"}, {"FilterDecimalColumnCompareColumn", "GreaterEqual", ">="}, - {"StringScalarCompareColumn", "Equal", "=="}, - {"StringScalarCompareColumn", "NotEqual", "!="}, - {"StringScalarCompareColumn", "Less", "<"}, - {"StringScalarCompareColumn", "LessEqual", "<="}, - {"StringScalarCompareColumn", "Greater", ">"}, - {"StringScalarCompareColumn", "GreaterEqual", ">="}, - - {"FilterStringColumnCompareColumn", "Equal", "=="}, - {"FilterStringColumnCompareColumn", "NotEqual", "!="}, - {"FilterStringColumnCompareColumn", "Less", "<"}, - {"FilterStringColumnCompareColumn", "LessEqual", "<="}, - {"FilterStringColumnCompareColumn", "Greater", ">"}, - {"FilterStringColumnCompareColumn", "GreaterEqual", ">="}, - - {"StringColumnCompareColumn", "Equal", "=="}, - {"StringColumnCompareColumn", "NotEqual", "!="}, - {"StringColumnCompareColumn", "Less", "<"}, - {"StringColumnCompareColumn", "LessEqual", "<="}, - {"StringColumnCompareColumn", "Greater", ">"}, - {"StringColumnCompareColumn", "GreaterEqual", ">="}, + {"StringGroupScalarCompareStringGroupColumnBase", "Equal", "=="}, + {"StringGroupScalarCompareStringGroupColumnBase", "NotEqual", "!="}, + {"StringGroupScalarCompareStringGroupColumnBase", "Less", "<"}, + {"StringGroupScalarCompareStringGroupColumnBase", "LessEqual", "<="}, + {"StringGroupScalarCompareStringGroupColumnBase", "Greater", ">"}, + {"StringGroupScalarCompareStringGroupColumnBase", "GreaterEqual", ">="}, + + {"StringScalarCompareStringGroupColumn", "Equal", "=="}, + {"StringScalarCompareStringGroupColumn", "NotEqual", "!="}, + {"StringScalarCompareStringGroupColumn", "Less", "<"}, + {"StringScalarCompareStringGroupColumn", "LessEqual", "<="}, + {"StringScalarCompareStringGroupColumn", "Greater", ">"}, + {"StringScalarCompareStringGroupColumn", "GreaterEqual", ">="}, + + {"TruncStringScalarCompareStringGroupColumn", "VarChar", "Equal", "=="}, + {"TruncStringScalarCompareStringGroupColumn", "VarChar", "NotEqual", "!="}, + {"TruncStringScalarCompareStringGroupColumn", "VarChar", "Less", "<"}, + {"TruncStringScalarCompareStringGroupColumn", "VarChar", "LessEqual", "<="}, + {"TruncStringScalarCompareStringGroupColumn", "VarChar", "Greater", ">"}, + {"TruncStringScalarCompareStringGroupColumn", "VarChar", "GreaterEqual", ">="}, + + {"TruncStringScalarCompareStringGroupColumn", "Char", "Equal", "=="}, + {"TruncStringScalarCompareStringGroupColumn", "Char", "NotEqual", "!="}, + {"TruncStringScalarCompareStringGroupColumn", "Char", "Less", "<"}, + {"TruncStringScalarCompareStringGroupColumn", "Char", "LessEqual", "<="}, + {"TruncStringScalarCompareStringGroupColumn", "Char", "Greater", ">"}, + {"TruncStringScalarCompareStringGroupColumn", "Char", "GreaterEqual", ">="}, + + {"FilterStringGroupColumnCompareStringGroupColumn", "Equal", "=="}, + {"FilterStringGroupColumnCompareStringGroupColumn", "NotEqual", "!="}, + {"FilterStringGroupColumnCompareStringGroupColumn", "Less", "<"}, + {"FilterStringGroupColumnCompareStringGroupColumn", "LessEqual", "<="}, + {"FilterStringGroupColumnCompareStringGroupColumn", "Greater", ">"}, + {"FilterStringGroupColumnCompareStringGroupColumn", "GreaterEqual", ">="}, + + {"StringGroupColumnCompareStringGroupColumn", "Equal", "=="}, + {"StringGroupColumnCompareStringGroupColumn", "NotEqual", "!="}, + {"StringGroupColumnCompareStringGroupColumn", "Less", "<"}, + {"StringGroupColumnCompareStringGroupColumn", "LessEqual", "<="}, + {"StringGroupColumnCompareStringGroupColumn", "Greater", ">"}, + {"StringGroupColumnCompareStringGroupColumn", "GreaterEqual", ">="}, {"FilterColumnCompareColumn", "Equal", "long", "double", "=="}, {"FilterColumnCompareColumn", "Equal", "double", "double", "=="}, @@ -658,22 +748,40 @@ private void generate() throws Exception { generateVectorUDAFVar(tdesc); } else if (tdesc[0].equals("VectorUDAFVarDecimal")) { generateVectorUDAFVarDecimal(tdesc); - } else if (tdesc[0].equals("FilterStringColumnCompareScalar")) { - generateFilterStringColumnCompareScalar(tdesc); + } else if (tdesc[0].equals("FilterStringGroupColumnCompareStringGroupScalarBase")) { + generateFilterStringGroupColumnCompareStringGroupScalarBase(tdesc); + } else if (tdesc[0].equals("FilterStringGroupColumnCompareStringScalar")) { + generateFilterStringGroupColumnCompareStringScalar(tdesc); + } else if (tdesc[0].equals("FilterStringGroupColumnCompareTruncStringScalar")) { + generateFilterStringGroupColumnCompareTruncStringScalar(tdesc); } else if (tdesc[0].equals("FilterStringColumnBetween")) { generateFilterStringColumnBetween(tdesc); + } else if (tdesc[0].equals("FilterTruncStringColumnBetween")) { + generateFilterTruncStringColumnBetween(tdesc); } else if (tdesc[0].equals("FilterDecimalColumnBetween")) { generateFilterDecimalColumnBetween(tdesc); - } else if (tdesc[0].equals("StringColumnCompareScalar")) { - generateStringColumnCompareScalar(tdesc); - } else if (tdesc[0].equals("FilterStringScalarCompareColumn")) { - generateFilterStringScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("StringScalarCompareColumn")) { - generateStringScalarCompareColumn(tdesc); - } else if (tdesc[0].equals("FilterStringColumnCompareColumn")) { - generateFilterStringColumnCompareColumn(tdesc); - } else if (tdesc[0].equals("StringColumnCompareColumn")) { - generateStringColumnCompareColumn(tdesc); + } else if (tdesc[0].equals("StringGroupColumnCompareStringGroupScalarBase")) { + generateStringGroupColumnCompareStringGroupScalarBase(tdesc); + } else if (tdesc[0].equals("StringGroupColumnCompareStringScalar")) { + generateStringGroupColumnCompareStringScalar(tdesc); + } else if (tdesc[0].equals("StringGroupColumnCompareTruncStringScalar")) { + generateStringGroupColumnCompareTruncStringScalar(tdesc); + } else if (tdesc[0].equals("FilterStringGroupScalarCompareStringGroupColumnBase")) { + generateFilterStringGroupScalarCompareStringGroupColumnBase(tdesc); + } else if (tdesc[0].equals("FilterStringScalarCompareStringGroupColumn")) { + generateFilterStringScalarCompareStringGroupColumn(tdesc); + } else if (tdesc[0].equals("FilterTruncStringScalarCompareStringGroupColumn")) { + generateFilterTruncStringScalarCompareStringGroupColumn(tdesc); + } else if (tdesc[0].equals("StringGroupScalarCompareStringGroupColumnBase")) { + generateStringGroupScalarCompareStringGroupColumnBase(tdesc); + } else if (tdesc[0].equals("StringScalarCompareStringGroupColumn")) { + generateStringScalarCompareStringGroupColumn(tdesc); + } else if (tdesc[0].equals("TruncStringScalarCompareStringGroupColumn")) { + generateTruncStringScalarCompareStringGroupColumn(tdesc); + } else if (tdesc[0].equals("FilterStringGroupColumnCompareStringGroupColumn")) { + generateFilterStringGroupColumnCompareStringGroupColumn(tdesc); + } else if (tdesc[0].equals("StringGroupColumnCompareStringGroupColumn")) { + generateStringGroupColumnCompareStringGroupColumn(tdesc); } else if (tdesc[0].equals("IfExprColumnColumn")) { generateIfExprColumnColumn(tdesc); } else if (tdesc[0].equals("IfExprColumnScalar")) { @@ -710,6 +818,35 @@ private void generateFilterStringColumnBetween(String[] tdesc) throws IOExceptio className, templateString); } + private void generateFilterTruncStringColumnBetween(String[] tdesc) throws IOException { + String truncStringTypeName = tdesc[1]; + String truncStringHiveType; + String truncStringHiveGetBytes; + if (truncStringTypeName == "Char") { + truncStringHiveType = "HiveChar"; + truncStringHiveGetBytes = "getStrippedValue().getBytes()"; + } else if (truncStringTypeName == "VarChar") { + truncStringHiveType = "HiveVarchar"; + truncStringHiveGetBytes = "getValue().getBytes()"; + } else { + throw new Error("Unsupported string type: " + truncStringTypeName); + } + String optionalNot = tdesc[2]; + String className = "Filter" + truncStringTypeName + "Column" + (optionalNot.equals("!") ? "Not" : "") + + "Between"; + // Read the template into a string, expand it, and write it. + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + templateString = templateString.replaceAll("", truncStringTypeName); + templateString = templateString.replaceAll("", truncStringHiveType); + templateString = templateString.replaceAll("", truncStringHiveGetBytes); + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", optionalNot); + + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + private void generateFilterDecimalColumnBetween(String[] tdesc) throws IOException { String optionalNot = tdesc[1]; String className = "FilterDecimalColumn" + (optionalNot.equals("!") ? "Not" : "") @@ -886,45 +1023,147 @@ private void generateVectorUDAFVarDecimal(String[] tdesc) throws Exception { writeFile(templateFile.lastModified(), udafOutputDirectory, udafClassesDirectory, className, templateString); } - - - private void generateFilterStringScalarCompareColumn(String[] tdesc) throws IOException { + + private void generateFilterStringGroupScalarCompareStringGroupColumnBase(String[] tdesc) throws IOException { String operatorName = tdesc[1]; - String className = "FilterStringScalar" + operatorName + "StringColumn"; + String className = "FilterStringGroupScalar" + operatorName + "StringGroupColumnBase"; // Template expansion logic is the same for both column-scalar and scalar-column cases. generateStringColumnCompareScalar(tdesc, className); } - private void generateStringScalarCompareColumn(String[] tdesc) throws IOException { + private void generateFilterStringScalarCompareStringGroupColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; - String className = "StringScalar" + operatorName + "StringColumn"; + String className = "FilterStringScalar" + operatorName + "StringGroupColumn"; + String baseClassName = "FilterStringGroupScalar" + operatorName + "StringGroupColumnBase"; + String operatorSymbol = tdesc[2]; + // Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateFilterTruncStringScalarCompareStringGroupColumn(String[] tdesc) throws IOException { + String truncStringTypeName = tdesc[1]; + String operatorName = tdesc[2]; + String className = "Filter" + truncStringTypeName + "Scalar" + operatorName + "StringGroupColumn"; + String baseClassName = "FilterStringGroupScalar" + operatorName + "StringGroupColumnBase"; + generateStringCompareTruncStringScalar(tdesc, className, baseClassName); + } + + private void generateStringGroupScalarCompareStringGroupColumnBase(String[] tdesc) throws IOException { + String operatorName = tdesc[1]; + String className = "StringGroupScalar" + operatorName + "StringGroupColumnBase"; // Template expansion logic is the same for both column-scalar and scalar-column cases. generateStringColumnCompareScalar(tdesc, className); } - private void generateFilterStringColumnCompareScalar(String[] tdesc) throws IOException { + private void generateStringScalarCompareStringGroupColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; - String className = "FilterStringCol" + operatorName + "StringScalar"; - generateStringColumnCompareScalar(tdesc, className); + String className = "StringScalar" + operatorName + "StringGroupColumn"; + String baseClassName = "StringGroupScalar" + operatorName + "StringGroupColumnBase"; + String operatorSymbol = tdesc[2]; + // Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); } + + private void generateTruncStringScalarCompareStringGroupColumn(String[] tdesc) throws IOException { + String truncStringTypeName = tdesc[1]; + String operatorName = tdesc[2]; + String className = truncStringTypeName + "Scalar" + operatorName + "StringGroupColumn"; + String baseClassName = "StringGroupScalar" + operatorName + "StringGroupColumnBase"; + generateStringCompareTruncStringScalar(tdesc, className, baseClassName); } - private void generateStringColumnCompareScalar(String[] tdesc) throws IOException { + private void generateFilterStringGroupColumnCompareStringGroupScalarBase(String[] tdesc) throws IOException { String operatorName = tdesc[1]; - String className = "StringCol" + operatorName + "StringScalar"; + String className = "FilterStringGroupCol" + operatorName + "StringGroupScalarBase"; generateStringColumnCompareScalar(tdesc, className); } - private void generateFilterStringColumnCompareColumn(String[] tdesc) throws IOException { + private void generateFilterStringGroupColumnCompareStringScalar(String[] tdesc) throws IOException { String operatorName = tdesc[1]; - String className = "FilterStringCol" + operatorName + "StringColumn"; + String className = "FilterStringGroupCol" + operatorName + "StringScalar"; + String baseClassName = "FilterStringGroupCol" + operatorName + "StringGroupScalarBase"; + String operatorSymbol = tdesc[2]; + // Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateFilterStringGroupColumnCompareTruncStringScalar(String[] tdesc) throws IOException { + String truncStringTypeName = tdesc[1]; + String operatorName = tdesc[2]; + String className = "FilterStringGroupCol" + operatorName + truncStringTypeName + "Scalar"; + String baseClassName = "FilterStringGroupCol" + operatorName + "StringGroupScalarBase"; + generateStringCompareTruncStringScalar(tdesc, className, baseClassName); + } + + private void generateStringGroupColumnCompareStringGroupScalarBase(String[] tdesc) throws IOException { + String operatorName = tdesc[1]; + String className = "StringGroupCol" + operatorName + "StringGroupScalarBase"; + String operatorSymbol = tdesc[2]; + // Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateStringGroupColumnCompareStringScalar(String[] tdesc) throws IOException { + String operatorName = tdesc[1]; + String className = "StringGroupCol" + operatorName + "StringScalar"; + String baseClassName = "StringGroupCol" + operatorName + "StringGroupScalarBase"; + String operatorSymbol = tdesc[2]; + // Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + + private void generateStringGroupColumnCompareTruncStringScalar(String[] tdesc) throws IOException { + String truncStringTypeName = tdesc[1]; + String operatorName = tdesc[2]; + String className = "StringGroupCol" + operatorName + truncStringTypeName + "Scalar"; + String baseClassName = "StringGroupCol" + operatorName + "StringGroupScalarBase"; + generateStringCompareTruncStringScalar(tdesc, className, baseClassName); + } + + private void generateFilterStringGroupColumnCompareStringGroupColumn(String[] tdesc) throws IOException { + String operatorName = tdesc[1]; + String className = "FilterStringGroupCol" + operatorName + "StringGroupColumn"; generateStringColumnCompareScalar(tdesc, className); } - private void generateStringColumnCompareColumn(String[] tdesc) throws IOException { + private void generateStringGroupColumnCompareStringGroupColumn(String[] tdesc) throws IOException { String operatorName = tdesc[1]; - String className = "StringCol" + operatorName + "StringColumn"; + String className = "StringGroupCol" + operatorName + "StringGroupColumn"; generateStringColumnCompareScalar(tdesc, className); } @@ -941,6 +1180,35 @@ private void generateStringColumnCompareScalar(String[] tdesc, String className) className, templateString); } + private void generateStringCompareTruncStringScalar(String[] tdesc, String className, String baseClassName) + throws IOException { + String truncStringTypeName = tdesc[1]; + String truncStringHiveType; + String truncStringHiveGetBytes; + if (truncStringTypeName == "Char") { + truncStringHiveType = "HiveChar"; + truncStringHiveGetBytes = "getStrippedValue().getBytes()"; + } else if (truncStringTypeName == "VarChar") { + truncStringHiveType = "HiveVarchar"; + truncStringHiveGetBytes = "getValue().getBytes()"; + } else { + throw new Error("Unsupported string type: " + truncStringTypeName); + } + String operatorSymbol = tdesc[3]; + // Read the template into a string; + File templateFile = new File(joinPath(this.expressionTemplateDirectory, tdesc[0] + ".txt")); + String templateString = readFile(templateFile); + // Expand, and write result + templateString = templateString.replaceAll("", className); + templateString = templateString.replaceAll("", baseClassName); + templateString = templateString.replaceAll("", operatorSymbol); + templateString = templateString.replaceAll("", truncStringTypeName); + templateString = templateString.replaceAll("", truncStringHiveType); + templateString = templateString.replaceAll("", truncStringHiveGetBytes); + writeFile(templateFile.lastModified(), expressionOutputDirectory, expressionClassesDirectory, + className, templateString); + } + private void generateFilterColumnCompareColumn(String[] tdesc) throws Exception { //The variables are all same as ColumnCompareScalar except that //this template doesn't need a return type. Pass anything as return type. diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt index 8b6e4b7..e8049da 100644 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnBetween.txt @@ -184,9 +184,9 @@ public class extends VectorExpression { VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(3) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnCompareColumn.txt deleted file mode 100644 index 1d8140f..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnCompareColumn.txt +++ /dev/null @@ -1,492 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Filter the rows in a batch by comparing one string column to another. - * This code is generated from a template. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - - public (int colNum1, int colNum2) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; - BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; - int[] sel = batch.selected; - boolean[] nullPos1 = inputColVector1.isNull; - boolean[] nullPos2 = inputColVector2.isNull; - int n = batch.size; - byte[][] vector1 = inputColVector1.vector; - byte[][] vector2 = inputColVector2.vector; - int[] start1 = inputColVector1.start; - int[] start2 = inputColVector2.start; - int[] length1 = inputColVector1.length; - int[] length2 = inputColVector2.length; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // handle case where neither input has nulls - if (inputColVector1.noNulls && inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - - /* Either all must remain selected or all will be eliminated. - * Repeating property will not change. - */ - if (!(StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - - // handle case where only input 2 has nulls - } else if (inputColVector1.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos2[0] || - !(StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - - // no need to check for nulls in input 1 - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - - // no values will qualify because every comparison will be with NULL - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - - // handle case where only input 1 has nulls - } else if (inputColVector2.noNulls) { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || - !(StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0)) { - batch.size = 0; - return; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - - // if repeating value is null then every comparison will fail so nothing qualifies - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - - // handle case where both inputs have nulls - } else { - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - if (nullPos1[0] || nullPos2[0] || - !(StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0)) { - batch.size = 0; - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - batch.size = 0; - return; - } - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - int newSize = 0; - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!nullPos1[i] && !nullPos2[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos1[i] && !nullPos2[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < batch.size) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public int getOutputColumn() { - return -1; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnCompareScalar.txt deleted file mode 100644 index cba51bc..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringColumnCompareScalar.txt +++ /dev/null @@ -1,176 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * This is a generated class to evaluate a comparison on a vector of strings. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private byte[] value; - - public (int colNum, byte[] value) { - this.colNum = colNum; - this.value = value; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int[] length = inputColVector.length; - int[] start = inputColVector.start; - - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0)) { - - //Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0)) { - - //Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - sel[newSize++] = i; - } - } - } - - //Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public byte[] getValue() { - return value; - } - - public void setValue(byte[] value) { - this.value = value; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt new file mode 100644 index 0000000..3bc4e33 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupColumn.txt @@ -0,0 +1,492 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + + public (int colNum1, int colNum2) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + + /* Either all must remain selected or all will be eliminated. + * Repeating property will not change. + */ + if (!(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + + // no values will qualify because every comparison will be with NULL + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + return; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + + // if repeating value is null then every comparison will fail so nothing qualifies + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + + // handle case where both inputs have nulls + } else { + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + if (nullPos1[0] || nullPos2[0] || + !(StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0)) { + batch.size = 0; + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + batch.size = 0; + return; + } + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos1[i] && !nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < batch.size) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return -1; + } + + public int getColNum1() { + return colNum1; + } + + public void setColNum1(int colNum1) { + this.colNum1 = colNum1; + } + + public int getColNum2() { + return colNum2; + } + + public void setColNum2(int colNum2) { + this.colNum2 = colNum2; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt new file mode 100644 index 0000000..1c868ba --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringGroupScalarBase.txt @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected byte[] value; + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] length = inputColVector.length; + int[] start = inputColVector.start; + + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + sel[newSize++] = i; + } + } + } + + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } + +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringScalar.txt new file mode 100644 index 0000000..02d7819 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareStringScalar.txt @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + */ +public class extends { + + public (int colNum, byte[] value) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt new file mode 100644 index 0000000..6caf58f --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupColumnCompareTruncStringScalar.txt @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.common.type.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + */ +public class extends { + + public (int colNum, value) { + this.colNum = colNum; + this.value = value.; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt new file mode 100644 index 0000000..91f5909 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringGroupScalarCompareStringGroupColumnBase.txt @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + * Do not edit the generated code directly. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected byte[] value; + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] length = inputColVector.length; + int[] start = inputColVector.start; + + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!(StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if (!(StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + sel[newSize++] = i; + } + } + } + + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringScalarCompareColumn.txt deleted file mode 100644 index f41ec67..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/FilterStringScalarCompareColumn.txt +++ /dev/null @@ -1,177 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * This is a generated class to evaluate a comparison on a vector of strings. - * Do not edit the generated code directly. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private byte[] value; - - public (byte[] value, int colNum) { - this.colNum = colNum; - this.value = value; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int[] length = inputColVector.length; - int[] start = inputColVector.start; - - - // return immediately if batch is empty - if (n == 0) { - return; - } - - if (inputColVector.noNulls) { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!(StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0)) { - - //Entire batch is filtered out. - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - sel[newSize++] = i; - } - } - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - sel[newSize++] = i; - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } else { - if (inputColVector.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!nullPos[0]) { - if (!(StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0)) { - - //Entire batch is filtered out. - batch.size = 0; - } - } else { - batch.size = 0; - } - } else if (batch.selectedInUse) { - int newSize = 0; - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!nullPos[i]) { - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - sel[newSize++] = i; - } - } - } - - //Change the selected vector - batch.size = newSize; - } else { - int newSize = 0; - for(int i = 0; i != n; i++) { - if (!nullPos[i]) { - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - sel[newSize++] = i; - } - } - } - if (newSize < n) { - batch.size = newSize; - batch.selectedInUse = true; - } - } - } - } - - @Override - public int getOutputColumn() { - return -1; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public byte[] getValue() { - return value; - } - - public void setValue(byte[] value) { - this.value = value; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.FILTER) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterStringScalarCompareStringGroupColumn.txt new file mode 100644 index 0000000..66472d5 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterStringScalarCompareStringGroupColumn.txt @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + * Do not edit the generated code directly. + */ +public class extends { + + public (byte[] value, int colNum) { + this.colNum = colNum; + this.value = value; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt new file mode 100644 index 0000000..94a174d --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringColumnBetween.txt @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.common.type.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + + +/** + * This is a generated class to evaluate a [NOT] BETWEEN comparison on a vector of strings. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum; + private byte[] left; + private byte[] right; + + public (int colNum, left, right) { + this.colNum = colNum; + this.left = left.; + this.right = right.; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] length = inputColVector.length; + int[] start = inputColVector.start; + + + // return immediately if batch is empty + if (n == 0) { + return; + } + + if (inputColVector.noNulls) { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 + || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j = 0; j != n; j++) { + int i = sel[j]; + if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 + && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { + sel[newSize++] = i; + } + } + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 + && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { + sel[newSize++] = i; + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } else { + if (inputColVector.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!nullPos[0]) { + if ((StringExpr.compare(vector[0], start[0], length[0], left, 0, left.length) < 0 + || StringExpr.compare(right, 0, right.length, vector[0], start[0], length[0]) < 0)) { + + //Entire batch is filtered out. + batch.size = 0; + } + } else { + batch.size = 0; + } + } else if (batch.selectedInUse) { + int newSize = 0; + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!nullPos[i]) { + if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 + && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { + sel[newSize++] = i; + } + } + } + + //Change the selected vector + batch.size = newSize; + } else { + int newSize = 0; + for(int i = 0; i != n; i++) { + if (!nullPos[i]) { + if ((StringExpr.compare(left, 0, left.length, vector[i], start[i], length[i]) <= 0 + && StringExpr.compare(vector[i], start[i], length[i], right, 0, right.length) <= 0)) { + sel[newSize++] = i; + } + } + } + if (newSize < n) { + batch.size = newSize; + batch.selectedInUse = true; + } + } + } + } + + @Override + public int getOutputColumn() { + return -1; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getLeft() { + return left; + } + + public void setLeft(byte[] value) { + this.left = value; + } + + public byte[] getRight() { + return right; + } + + public void setRight(byte[] value) { + this.right = value; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } + +} diff --git ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt new file mode 100644 index 0000000..ab39596 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/FilterTruncStringScalarCompareStringGroupColumn.txt @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.common.type.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + * Do not edit the generated code directly. + */ +public class extends { + + public ( value, int colNum) { + this.colNum = colNum; + this.value = value.; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.FILTER) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringColumnCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringColumnCompareColumn.txt deleted file mode 100644 index 83e8934..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/StringColumnCompareColumn.txt +++ /dev/null @@ -1,508 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Filter the rows in a batch by comparing one string column to another. - * This code is generated from a template. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum1; - private int colNum2; - private int outputColumn; - - public (int colNum1, int colNum2, int outputColumn) { - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; - BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; - LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] nullPos1 = inputColVector1.isNull; - boolean[] nullPos2 = inputColVector2.isNull; - boolean[] outNull = outputColVector.isNull; - - int n = batch.size; - byte[][] vector1 = inputColVector1.vector; - byte[][] vector2 = inputColVector2.vector; - int[] start1 = inputColVector1.start; - int[] start2 = inputColVector2.start; - int[] length1 = inputColVector1.length; - int[] length2 = inputColVector2.length; - - long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.noNulls = true; - outputColVector.isRepeating = false; - // handle case where neither input has nulls - if (inputColVector1.noNulls && inputColVector2.noNulls) { - outputColVector.noNulls = true; - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - int ret = StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]); - if (ret 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } else if (inputColVector1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - - // handle case where only input 2 has nulls - } else if (inputColVector1.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos2[0]; - if (!nullPos2[0]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } - } else if (inputColVector1.isRepeating) { - - // no need to check for nulls in input 1 - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos2[i]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - // Entire output vector will be null - outputColVector.isRepeating = true; - outNull[0] = true; - return; - } - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = false; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = false; - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos2[i]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } - - // handle case where only input 1 has nulls - } else if (inputColVector2.noNulls) { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos1[0]; - if (!nullPos1[0]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - // Entire output vector will be null - outputColVector.isRepeating = true; - outNull[0] = true; - return; - } - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = false; - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = false; - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else if (inputColVector2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos1[i]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos1[i]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } - - // handle case where both inputs have nulls - } else { - outputColVector.noNulls = false; - if (inputColVector1.isRepeating && inputColVector2.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos1[0] || nullPos2[0]; - if (!outNull[0]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[0], start2[0], length2[0]) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } - } else if (inputColVector1.isRepeating) { - if (nullPos1[0]) { - outputColVector.isRepeating = true; - outNull[0] = true; - return; - } - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos2[i]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos2[i]; - if (!nullPos2[i]) { - if (StringExpr.compare(vector1[0], start1[0], length1[0], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } else if (inputColVector2.isRepeating) { - if (nullPos2[0]) { - outputColVector.isRepeating = true; - outNull[0] = true; - return; - } - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos1[i]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i]; - if (!nullPos1[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[0], start2[0], length2[0]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } else { // neither input is repeating - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos1[i] || nullPos2[i]; - if (!outNull[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos1[i] || nullPos2[i]; - if (!outNull[i]) { - if (StringExpr.compare(vector1[i], start1[i], length1[i], - vector2[i], start2[i], length2[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } - } - } - - @Override - public String getOutputType() { - return "boolean"; - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/StringColumnCompareScalar.txt ql/src/gen/vectorization/ExpressionTemplates/StringColumnCompareScalar.txt deleted file mode 100644 index a198cb6..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/StringColumnCompareScalar.txt +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * This is a generated class to evaluate a comparison on a vector of strings. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private byte[] value; - private int outputColumn; - - public (int colNum, byte[] value, int outputColumn) { - this.colNum = colNum; - this.value = value; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNull = outputColVector.isNull; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int[] length = inputColVector.length; - int[] start = inputColVector.start; - long[] outVector = outputColVector.vector; - - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = false; - if (inputColVector.noNulls) { - outputColVector.noNulls = true; - if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; - if (StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - outputColVector.noNulls = false; - if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos[0]; - if (!nullPos[0]) { - if (StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos[i]; - if (!nullPos[i]) { - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos[i]; - if (!nullPos[i]) { - if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public byte[] getValue() { - return value; - } - - public void setValue(byte[] value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt new file mode 100644 index 0000000..749edc7 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupColumn.txt @@ -0,0 +1,508 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Filter the rows in a batch by comparing one string column to another. + * This code is generated from a template. + */ +public class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int colNum1; + private int colNum2; + private int outputColumn; + + public (int colNum1, int colNum2, int outputColumn) { + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inputColVector2 = (BytesColumnVector) batch.cols[colNum2]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos1 = inputColVector1.isNull; + boolean[] nullPos2 = inputColVector2.isNull; + boolean[] outNull = outputColVector.isNull; + + int n = batch.size; + byte[][] vector1 = inputColVector1.vector; + byte[][] vector2 = inputColVector2.vector; + int[] start1 = inputColVector1.start; + int[] start2 = inputColVector2.start; + int[] length1 = inputColVector1.length; + int[] length2 = inputColVector2.length; + + long[] outVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.noNulls = true; + outputColVector.isRepeating = false; + // handle case where neither input has nulls + if (inputColVector1.noNulls && inputColVector2.noNulls) { + outputColVector.noNulls = true; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isRepeating = true; + int ret = StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]); + if (ret 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } else if (inputColVector1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + + // handle case where only input 2 has nulls + } else if (inputColVector1.noNulls) { + outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isRepeating = true; + outNull[0] = nullPos2[0]; + if (!nullPos2[0]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } + } else if (inputColVector1.isRepeating) { + + // no need to check for nulls in input 1 + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos2[i]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos2[i]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + // Entire output vector will be null + outputColVector.isRepeating = true; + outNull[0] = true; + return; + } + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = false; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = false; + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos2[i]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos2[i]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } + + // handle case where only input 1 has nulls + } else if (inputColVector2.noNulls) { + outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isRepeating = true; + outNull[0] = nullPos1[0]; + if (!nullPos1[0]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + // Entire output vector will be null + outputColVector.isRepeating = true; + outNull[0] = true; + return; + } + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = false; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = false; + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else if (inputColVector2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos1[i]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos1[i]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos1[i]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos1[i]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } + + // handle case where both inputs have nulls + } else { + outputColVector.noNulls = false; + if (inputColVector1.isRepeating && inputColVector2.isRepeating) { + outputColVector.isRepeating = true; + outNull[0] = nullPos1[0] || nullPos2[0]; + if (!outNull[0]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[0], start2[0], length2[0]) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } + } else if (inputColVector1.isRepeating) { + if (nullPos1[0]) { + outputColVector.isRepeating = true; + outNull[0] = true; + return; + } + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos2[i]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos2[i]; + if (!nullPos2[i]) { + if (StringExpr.compare(vector1[0], start1[0], length1[0], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } else if (inputColVector2.isRepeating) { + if (nullPos2[0]) { + outputColVector.isRepeating = true; + outNull[0] = true; + return; + } + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos1[i]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos1[i]; + if (!nullPos1[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[0], start2[0], length2[0]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } else { // neither input is repeating + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos1[i] || nullPos2[i]; + if (!outNull[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos1[i] || nullPos2[i]; + if (!outNull[i]) { + if (StringExpr.compare(vector1[i], start1[i], length1[i], + vector2[i], start2[i], length2[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } + } + } + + @Override + public String getOutputType() { + return "boolean"; + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + public int getColNum1() { + return colNum1; + } + + public void setColNum1(int colNum1) { + this.colNum1 = colNum1; + } + + public int getColNum2() { + return colNum2; + } + + public void setColNum2(int colNum2) { + this.colNum2 = colNum2; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt new file mode 100644 index 0000000..4d65d50 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringGroupScalarBase.txt @@ -0,0 +1,158 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected byte[] value; + protected int outputColumn; + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNull = outputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] length = inputColVector.length; + int[] start = inputColVector.start; + long[] outVector = outputColVector.vector; + + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { + outputColVector.noNulls = true; + if (inputColVector.isRepeating) { + outputColVector.isRepeating = true; + if (StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + outputColVector.noNulls = false; + if (inputColVector.isRepeating) { + outputColVector.isRepeating = true; + outNull[0] = nullPos[0]; + if (!nullPos[0]) { + if (StringExpr.compare(vector[0], start[0], length[0], value, 0, value.length) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos[i]; + if (!nullPos[i]) { + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos[i]; + if (!nullPos[i]) { + if (StringExpr.compare(vector[i], start[i], length[i], value, 0, value.length) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringScalar.txt new file mode 100644 index 0000000..1df7157 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareStringScalar.txt @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + */ +public class extends { + + public (int colNum, byte[] value, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt new file mode 100644 index 0000000..36b3c11 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupColumnCompareTruncStringScalar.txt @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.common.type.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + */ +public class extends { + + public (int colNum, value, int outputColumn) { + this.colNum = colNum; + this.value = value.; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.getType("")) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt new file mode 100644 index 0000000..a734281 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringGroupScalarCompareStringGroupColumnBase.txt @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + * Do not edit the generated code directly. + */ +public abstract class extends VectorExpression { + + private static final long serialVersionUID = 1L; + + protected int colNum; + protected byte[] value; + protected int outputColumn; + + @Override + public void evaluate(VectorizedRowBatch batch) { + if (childExpressions != null) { + super.evaluateChildren(batch); + } + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] nullPos = inputColVector.isNull; + boolean[] outNull = outputColVector.isNull; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] length = inputColVector.length; + int[] start = inputColVector.start; + long[] outVector = outputColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.isRepeating = false; + if (inputColVector.noNulls) { + outputColVector.noNulls = true; + if (inputColVector.isRepeating) { + outputColVector.isRepeating = true; + if (StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } else { + for(int i = 0; i != n; i++) { + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + outputColVector.noNulls = false; + if (inputColVector.isRepeating) { + outputColVector.isRepeating = true; + outNull[0] = nullPos[0]; + if (!nullPos[0]) { + if (StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0) { + outVector[0] = 1; + } else { + outVector[0] = 0; + } + } + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outNull[i] = nullPos[i]; + if (!nullPos[i]) { + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } else { + for(int i = 0; i != n; i++) { + outNull[i] = nullPos[i]; + if (!nullPos[i]) { + if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { + outVector[i] = 1; + } else { + outVector[i] = 0; + } + } + } + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "boolean"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } +} \ No newline at end of file diff --git ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt deleted file mode 100644 index c797018..0000000 --- ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareColumn.txt +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * This is a generated class to evaluate a comparison on a vector of strings. - * Do not edit the generated code directly. - */ -public class extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int colNum; - private byte[] value; - private int outputColumn; - - public (byte[] value, int colNum, int outputColumn) { - this.colNum = colNum; - this.value = value; - this.outputColumn = outputColumn; - } - - public () { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - if (childExpressions != null) { - super.evaluateChildren(batch); - } - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - LongColumnVector outputColVector = (LongColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] nullPos = inputColVector.isNull; - boolean[] outNull = outputColVector.isNull; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int[] length = inputColVector.length; - int[] start = inputColVector.start; - long[] outVector = outputColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.isRepeating = false; - if (inputColVector.noNulls) { - outputColVector.noNulls = true; - if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; - if (StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } else { - for(int i = 0; i != n; i++) { - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - outputColVector.noNulls = false; - if (inputColVector.isRepeating) { - outputColVector.isRepeating = true; - outNull[0] = nullPos[0]; - if (!nullPos[0]) { - if (StringExpr.compare(value, 0, value.length, vector[0], start[0], length[0]) 0) { - outVector[0] = 1; - } else { - outVector[0] = 0; - } - } - } else if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outNull[i] = nullPos[i]; - if (!nullPos[i]) { - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } else { - for(int i = 0; i != n; i++) { - outNull[i] = nullPos[i]; - if (!nullPos[i]) { - if (StringExpr.compare(value, 0, value.length, vector[i], start[i], length[i]) 0) { - outVector[i] = 1; - } else { - outVector[i] = 0; - } - } - } - } - } - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "boolean"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public byte[] getValue() { - return value; - } - - public void setValue(byte[] value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareStringGroupColumn.txt new file mode 100644 index 0000000..fcf55e1 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/StringScalarCompareStringGroupColumn.txt @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + * Do not edit the generated code directly. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public (byte[] value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt new file mode 100644 index 0000000..43c6206 --- /dev/null +++ ql/src/gen/vectorization/ExpressionTemplates/TruncStringScalarCompareStringGroupColumn.txt @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions.gen; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.; + +import org.apache.hadoop.hive.common.type.; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * This is a generated class to evaluate a comparison on a vector of strings. + * Do not edit the generated code directly. + */ +public class extends { + + private static final long serialVersionUID = 1L; + + public ( value, int colNum, int outputColumn) { + this.colNum = colNum; + this.value = value.; + this.outputColumn = outputColumn; + } + + public () { + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.getType(""), + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index 2d67b5b..1ed0300 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -35,8 +35,11 @@ LONG(1), DOUBLE(2), STRING(3), - DECIMAL(4), - ANY(7); + CHAR(4), + VARCHAR(5), + STRING_GROUP(6), + DECIMAL(7), + ANY(8); private final int value; @@ -162,6 +165,28 @@ public Descriptor build() { */ public static final class Descriptor { + private boolean isStringGroup(ArgumentType type) { + if (type == ArgumentType.STRING || + type == ArgumentType.CHAR || + type == ArgumentType.VARCHAR || + type == ArgumentType.STRING_GROUP) { + return true; + } + return false; + } + private boolean isSameGroup(ArgumentType type1, ArgumentType type2) { + if (type1.equals(ArgumentType.ANY) || + type2.equals(ArgumentType.ANY)) { + return true; + } + if (type1 == ArgumentType.STRING_GROUP && isStringGroup(type2)) { + return true; + } + if (type2 == ArgumentType.STRING_GROUP && isStringGroup(type1)) { + return true; + } + return false; + } @Override public boolean equals(Object o) { Descriptor other = (Descriptor) o; @@ -169,8 +194,8 @@ public boolean equals(Object o) { return false; } for (int i = 0; i < argCount; i++) { - if (!argTypes[i].equals(other.argTypes[i]) && (!argTypes[i].equals(ArgumentType.ANY) && - !other.argTypes[i].equals(ArgumentType.ANY))) { + if (!argTypes[i].equals(other.argTypes[i]) && + !isSameGroup(argTypes[i], other.argTypes[i])) { return false; } if (!exprTypes[i].equals(other.exprTypes[i])) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index a78c396..2499241 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -97,11 +97,13 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.util.StringUtils; /** * Context class for vectorization execution. @@ -123,6 +125,15 @@ public static final Pattern decimalTypePattern = Pattern.compile("decimal.*", Pattern.CASE_INSENSITIVE); + public static final Pattern charTypePattern = Pattern.compile("char.*", + Pattern.CASE_INSENSITIVE); + + public static final Pattern varcharTypePattern = Pattern.compile("varchar.*", + Pattern.CASE_INSENSITIVE); + + public static final Pattern charVarcharTypePattern = Pattern.compile("char.*|varchar.*", + Pattern.CASE_INSENSITIVE); + //Map column number to type private final OutputColumnManager ocm; @@ -325,7 +336,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc exprDesc, Mode mode) th childExpressions, mode, exprDesc.getTypeInfo()); } } else if (exprDesc instanceof ExprNodeNullDesc) { - ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode); + ve = getConstantVectorExpression(null, exprDesc.getTypeInfo(), mode); } else if (exprDesc instanceof ExprNodeConstantDesc) { ve = getConstantVectorExpression(((ExprNodeConstantDesc) exprDesc).getValue(), exprDesc.getTypeInfo(), mode); @@ -548,6 +559,12 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException case STRING: udfClass = new UDFToString(); break; + case CHAR: + genericUdf = new GenericUDFToChar(); + break; + case VARCHAR: + genericUdf = new GenericUDFToVarchar(); + break; case BOOLEAN: udfClass = new UDFToBoolean(); break; @@ -572,7 +589,7 @@ private GenericUDF getGenericUDFForCast(TypeInfo castType) throws HiveException ((GenericUDFBridge) genericUdf).setUdfClassName(udfClass.getClass().getName()); } if (genericUdf instanceof SettableUDF) { - ((SettableUDF)genericUdf).setTypeInfo(castType); + ((SettableUDF)genericUdf).setTypeInfo(castType); } return genericUdf; } @@ -592,15 +609,15 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { Class udfClass = bridge.getUdfClass(); if (udfClass.equals(UDFHex.class) || udfClass.equals(UDFConv.class) - || isCastToIntFamily(udfClass) && arg0Type(expr).equals("string") - || isCastToFloatFamily(udfClass) && arg0Type(expr).equals("string") + || isCastToIntFamily(udfClass) && isStringFamily(arg0Type(expr)) + || isCastToFloatFamily(udfClass) && isStringFamily(arg0Type(expr)) || udfClass.equals(UDFToString.class) && (arg0Type(expr).equals("timestamp") || arg0Type(expr).equals("double") || arg0Type(expr).equals("float"))) { return true; } - } else if ((gudf instanceof GenericUDFTimestamp && arg0Type(expr).equals("string")) + } else if ((gudf instanceof GenericUDFTimestamp && isStringFamily(arg0Type(expr))) /* GenericUDFCase and GenericUDFWhen are implemented with the UDF Adaptor because * of their complexity and generality. In the future, variations of these @@ -615,6 +632,16 @@ public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr) { || gudf instanceof GenericUDFCase || gudf instanceof GenericUDFWhen) { return true; + } else if (gudf instanceof GenericUDFToChar && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; + } else if (gudf instanceof GenericUDFToVarchar && + (arg0Type(expr).equals("timestamp") + || arg0Type(expr).equals("double") + || arg0Type(expr).equals("float"))) { + return true; } return false; } @@ -723,12 +750,15 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI Mode mode) throws HiveException { String type = typeInfo.getTypeName(); String colVectorType = getNormalizedTypeName(type); + if (colVectorType == null) { + throw new HiveException("No vector type for type name " + type); + } int outCol = -1; if (mode == Mode.PROJECTION) { outCol = ocm.allocateOutputColumn(colVectorType); } if (constantValue == null) { - return new ConstantVectorExpression(outCol, type, true); + return new ConstantVectorExpression(outCol, type, true); } else if (decimalTypePattern.matcher(type).matches()) { VectorExpression ve = new ConstantVectorExpression(outCol, (Decimal128) constantValue); ve.setOutputType(typeInfo.getTypeName()); @@ -739,7 +769,7 @@ private VectorExpression getConstantVectorExpression(Object constantValue, TypeI ((Number) constantValue).longValue()); } else if (type.equalsIgnoreCase("double") || type.equalsIgnoreCase("float")) { return new ConstantVectorExpression(outCol, ((Number) constantValue).doubleValue()); - } else if (type.equalsIgnoreCase("string")) { + } else if (isStringFamily(type)) { return new ConstantVectorExpression(outCol, ((String) constantValue).getBytes()); } else if (type.equalsIgnoreCase("boolean")) { if (mode == Mode.FILTER) { @@ -799,7 +829,15 @@ private VectorExpression getVectorExpressionForUdf(Class udf, List vectorClass, try { for (int i = 0; i < numChildren; i++) { ExprNodeDesc child = childExpr.get(i); - inputTypes[i] = VectorExpression.Type.getValue(child.getTypeInfo().getTypeName()); + String typeName = getUndecoratedTypeName(child.getTypeInfo().getTypeName()); + inputTypes[i] = VectorExpression.Type.getValue(typeName); if (child instanceof ExprNodeGenericFuncDesc) { VectorExpression vChild = getVectorExpression(child, childrenMode); children.add(vChild); @@ -870,36 +909,71 @@ private Mode getChildrenMode(Mode mode, Class udf) { return Mode.PROJECTION; } + private String getNewInstanceArgumentString(Object [] args) { + if (args == null) { + return "arguments: NULL"; + } + ArrayList argClasses = new ArrayList(); + for (Object obj : args) { + argClasses.add(obj.getClass().getSimpleName()); + } + return "arguments: " + Arrays.toString(args) + ", argument classes: " + argClasses.toString(); + } + private VectorExpression instantiateExpression(Class vclass, TypeInfo returnType, Object...args) throws HiveException { VectorExpression ve = null; Constructor ctor = getConstructor(vclass); int numParams = ctor.getParameterTypes().length; int argsLength = (args == null) ? 0 : args.length; - try { - if (numParams == 0) { + if (numParams == 0) { + try { ve = (VectorExpression) ctor.newInstance(); - } else if (numParams == argsLength) { + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with 0 arguments, exception: " + + StringUtils.stringifyException(ex)); + } + } else if (numParams == argsLength) { + try { ve = (VectorExpression) ctor.newInstance(args); - } else if (numParams == argsLength + 1) { - // Additional argument is needed, which is the outputcolumn. + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with " + getNewInstanceArgumentString(args) + ", exception: " + + StringUtils.stringifyException(ex)); + } + } else if (numParams == argsLength + 1) { + // Additional argument is needed, which is the outputcolumn. + Object [] newArgs = null; + try { String outType; // Special handling for decimal because decimal types need scale and precision parameter. // This special handling should be avoided by using returnType uniformly for all cases. if (returnType != null) { outType = getNormalizedTypeName(returnType.getTypeName()).toLowerCase(); + if (outType == null) { + throw new HiveException("No vector type for type name " + returnType); + } } else { outType = ((VectorExpression) vclass.newInstance()).getOutputType(); } int outputCol = ocm.allocateOutputColumn(outType); - Object [] newArgs = Arrays.copyOf(args, numParams); + newArgs = Arrays.copyOf(args, numParams); newArgs[numParams-1] = outputCol; + ve = (VectorExpression) ctor.newInstance(newArgs); ve.setOutputType(outType); + } catch (Exception ex) { + throw new HiveException("Could not instantiate " + vclass.getSimpleName() + " with arguments " + getNewInstanceArgumentString(newArgs) + ", exception: " + + StringUtils.stringifyException(ex)); + } + } + // Add maxLength parameter to UDFs that have CHAR or VARCHAR output. + if (ve instanceof TruncStringOutput) { + TruncStringOutput truncStringOutput = (TruncStringOutput) ve; + if (returnType instanceof BaseCharTypeInfo) { + BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) returnType; + truncStringOutput.setMaxLength(baseCharTypeInfo.getLength()); } - } catch (Exception ex) { - throw new HiveException("Could not instantiate " + vclass.getSimpleName(), ex); } return ve; } @@ -933,8 +1007,12 @@ private VectorExpression getGenericUdfVectorExpression(GenericUDF udf, } } else if (udf instanceof GenericUDFToDecimal) { return getCastToDecimal(childExpr, returnType); - } - + } else if (udf instanceof GenericUDFToChar) { + return getCastToChar(childExpr, returnType); + } else if (udf instanceof GenericUDFToVarchar) { + return getCastToVarChar(childExpr, returnType); + } + // Now do a general lookup Class udfClass = udf.getClass(); if (udf instanceof GenericUDFBridge) { @@ -1265,6 +1343,64 @@ private VectorExpression getCastToString(List childExpr, TypeInfo throw new HiveException("Unhandled cast input type: " + inputType); } + private VectorExpression getCastToChar(List childExpr, TypeInfo returnType) + throws HiveException { + ExprNodeDesc child = childExpr.get(0); + String inputType = childExpr.get(0).getTypeString(); + if (child instanceof ExprNodeConstantDesc) { + // Don't do constant folding here. Wait until the optimizer is changed to do it. + // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. + return null; + } + if (inputType.equals("boolean")) { + // Boolean must come before the integer family. It's a special case. + return createVectorExpression(CastBooleanToCharViaLongToChar.class, childExpr, Mode.PROJECTION, null); + } else if (isIntFamily(inputType)) { + return createVectorExpression(CastLongToChar.class, childExpr, Mode.PROJECTION, null); + } else if (isDecimalFamily(inputType)) { + return createVectorExpression(CastDecimalToChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isDateFamily(inputType)) { + return createVectorExpression(CastDateToChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isStringFamily(inputType)) { + return createVectorExpression(CastStringGroupToChar.class, childExpr, Mode.PROJECTION, returnType); + } + + /* + * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. + */ + + throw new HiveException("Unhandled cast input type: " + inputType); + } + + private VectorExpression getCastToVarChar(List childExpr, TypeInfo returnType) + throws HiveException { + ExprNodeDesc child = childExpr.get(0); + String inputType = childExpr.get(0).getTypeString(); + if (child instanceof ExprNodeConstantDesc) { + // Don't do constant folding here. Wait until the optimizer is changed to do it. + // Family of related JIRAs: HIVE-7421, HIVE-7422, and HIVE-7424. + return null; + } + if (inputType.equals("boolean")) { + // Boolean must come before the integer family. It's a special case. + return createVectorExpression(CastBooleanToVarCharViaLongToVarChar.class, childExpr, Mode.PROJECTION, null); + } else if (isIntFamily(inputType)) { + return createVectorExpression(CastLongToVarChar.class, childExpr, Mode.PROJECTION, null); + } else if (isDecimalFamily(inputType)) { + return createVectorExpression(CastDecimalToVarChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isDateFamily(inputType)) { + return createVectorExpression(CastDateToVarChar.class, childExpr, Mode.PROJECTION, returnType); + } else if (isStringFamily(inputType)) { + return createVectorExpression(CastStringGroupToVarChar.class, childExpr, Mode.PROJECTION, returnType); + } + + /* + * Timestamp, float, and double types are handled by the legacy code path. See isLegacyPathUDF. + */ + + throw new HiveException("Unhandled cast input type: " + inputType); + } + private VectorExpression getCastToDoubleExpression(Class udf, List childExpr, TypeInfo returnType) throws HiveException { ExprNodeDesc child = childExpr.get(0); @@ -1304,12 +1440,12 @@ private VectorExpression getCastToBoolean(List childExpr) return getConstantVectorExpression(null, TypeInfoFactory.booleanTypeInfo, Mode.PROJECTION); } // Long and double are handled using descriptors, string needs to be specially handled. - if (inputType.equals("string")) { + if (isStringFamily(inputType)) { // string casts to false if it is 0 characters long, otherwise true VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, Mode.PROJECTION, null); - int outputCol = ocm.allocateOutputColumn("integer"); + int outputCol = ocm.allocateOutputColumn("Long"); VectorExpression lenToBoolExpr = new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol); lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr}); @@ -1411,6 +1547,14 @@ private VectorExpression getBetweenFilterExpression(List childExpr cl = FilterStringColumnBetween.class; } else if (colType.equals("string") && notKeywordPresent) { cl = FilterStringColumnNotBetween.class; + } else if (varcharTypePattern.matcher(colType).matches() && !notKeywordPresent) { + cl = FilterVarCharColumnBetween.class; + } else if (varcharTypePattern.matcher(colType).matches() && notKeywordPresent) { + cl = FilterVarCharColumnNotBetween.class; + } else if (charTypePattern.matcher(colType).matches() && !notKeywordPresent) { + cl = FilterCharColumnBetween.class; + } else if (charTypePattern.matcher(colType).matches() && notKeywordPresent) { + cl = FilterCharColumnNotBetween.class; } else if (colType.equals("timestamp")) { // Get timestamp boundary values as longs instead of the expected strings @@ -1509,7 +1653,7 @@ private VectorExpression getCustomUDFExpression(ExprNodeGenericFuncDesc expr) } public static boolean isStringFamily(String resultType) { - return resultType.equalsIgnoreCase("string"); + return resultType.equalsIgnoreCase("string") || charVarcharTypePattern.matcher(resultType).matches(); } public static boolean isDatetimeFamily(String resultType) { @@ -1617,7 +1761,7 @@ private long getTimestampScalar(ExprNodeDesc expr) throws HiveException { "Non-constant argument not supported for vectorization."); } ExprNodeConstantDesc constExpr = (ExprNodeConstantDesc) expr; - if (constExpr.getTypeString().equals("string")) { + if (isStringFamily(constExpr.getTypeString())) { // create expression tree with type cast from string to timestamp ExprNodeGenericFuncDesc expr2 = new ExprNodeGenericFuncDesc(); @@ -1667,34 +1811,91 @@ private long evaluateCastToTimestamp(ExprNodeDesc expr) throws HiveException { } } - static String getNormalizedTypeName(String colType){ + static String getNormalizedTypeName(String colType) { String normalizedType = null; - if (colType.equalsIgnoreCase("Double") || colType.equalsIgnoreCase("Float")) { + String lower = colType.toLowerCase(); + if (lower.equals("double") || lower.equals("float")) { normalizedType = "Double"; - } else if (colType.equalsIgnoreCase("String")) { + } else if (lower.equals("string")) { normalizedType = "String"; - } else if (decimalTypePattern.matcher(colType).matches()) { + } else if (charTypePattern.matcher(lower).matches()) { + //Return the CHAR type as is, it includes maximum length. + normalizedType = colType; + } else if (varcharTypePattern.matcher(lower).matches()) { + //Return the VARCHAR type as is, it includes maximum length. + normalizedType = colType; + } else if (decimalTypePattern.matcher(lower).matches()) { //Return the decimal type as is, it includes scale and precision. normalizedType = colType; - } else { + } else if (lower.equals("tinyint") || + lower.equals("smallint") || + lower.equals("int") || + lower.equals("bigint") || + lower.equals("boolean") || + lower.equals("long") || + lower.equals("timestamp") || + lower.equals("date")) { normalizedType = "Long"; + } else if (lower.equals("void")) { + // The old code defaulted to Long... + normalizedType = "Long"; + } else { + normalizedType = null; } return normalizedType; } + + static String getUndecoratedTypeName(String colType) throws HiveException { + String undecoratedType = null; + String lower = colType.toLowerCase(); + if (lower.equals("double") || lower.equals("float")) { + undecoratedType = "Double"; + } else if (lower.equals("string")) { + undecoratedType = "String"; + } else if (charTypePattern.matcher(lower).matches()) { + //Return the CHAR type without maximum length. + undecoratedType = "Char"; + } else if (varcharTypePattern.matcher(lower).matches()) { + //Return the VARCHAR type without. + undecoratedType = "VarChar"; + } else if (decimalTypePattern.matcher(lower).matches()) { + //Return the decimal type without scale and precision. + undecoratedType = "Decimal"; + } else if (lower.equals("tinyint") || + lower.equals("smallint") || + lower.equals("int") || + lower.equals("bigint") || + lower.equals("boolean") || + lower.equals("long") || + lower.equals("timestamp") || + lower.equals("date")) { + undecoratedType = "Long"; + } else { + undecoratedType = null; + } + return undecoratedType; + } + static Object[][] aggregatesDefinition = { {"min", "Long", VectorUDAFMinLong.class}, {"min", "Double", VectorUDAFMinDouble.class}, {"min", "String", VectorUDAFMinString.class}, + {"min", "Char", VectorUDAFMinString.class}, + {"min", "VarChar", VectorUDAFMinString.class}, {"min", "Decimal",VectorUDAFMinDecimal.class}, {"max", "Long", VectorUDAFMaxLong.class}, {"max", "Double", VectorUDAFMaxDouble.class}, {"max", "String", VectorUDAFMaxString.class}, + {"max", "Char", VectorUDAFMaxString.class}, + {"max", "VarChar", VectorUDAFMaxString.class}, {"max", "Decimal",VectorUDAFMaxDecimal.class}, {"count", null, VectorUDAFCountStar.class}, {"count", "Long", VectorUDAFCount.class}, {"count", "Double", VectorUDAFCount.class}, {"count", "String", VectorUDAFCount.class}, + {"count", "Char", VectorUDAFCount.class}, + {"count", "VarChar", VectorUDAFCount.class}, {"count", "Decimal",VectorUDAFCount.class}, {"sum", "Long", VectorUDAFSumLong.class}, {"sum", "Double", VectorUDAFSumDouble.class}, @@ -1741,10 +1942,7 @@ public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc) if (paramDescList.size() > 0) { ExprNodeDesc inputExpr = paramDescList.get(0); - inputType = getNormalizedTypeName(inputExpr.getTypeString()); - if (decimalTypePattern.matcher(inputType).matches()) { - inputType = "Decimal"; - } + inputType = getUndecoratedTypeName(inputExpr.getTypeString()); } for (Object[] aggDef : aggregatesDefinition) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 16454e7..1e24710 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -23,11 +23,16 @@ import java.util.LinkedList; import java.util.List; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -126,6 +131,8 @@ private static void allocateColumnVector(StructObjectInspector oi, break; case BINARY: case STRING: + case CHAR: + case VARCHAR: cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); break; case DECIMAL: @@ -375,6 +382,51 @@ public static void addRowToBatchFrom(Object row, StructObjectInspector oi, } } break; + case CHAR: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar(); + byte[] bytes = colHiveChar.getStrippedValue().getBytes(); + + // We assume the CHAR maximum length was enforced when the object was created. + int length = bytes.length; + + int start = buffer.getLength(); + try { + // In vector mode, we store CHAR as unpadded. + buffer.write(bytes, 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; + case VARCHAR: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i]; + if (writableCol != null) { + bcv.isNull[rowIndex] = false; + HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar(); + byte[] bytes = colHiveVarchar.getValue().getBytes(); + + // We assume the VARCHAR maximum length was enforced when the object was created. + int length = bytes.length; + + int start = buffer.getLength(); + try { + buffer.write(bytes, 0, length); + } catch (IOException ioe) { + throw new IllegalStateException("bad write", ioe); + } + bcv.setRef(rowIndex, buffer.getData(), start, length); + } else { + setNullColIsNullValue(bcv, rowIndex); + } + } + break; case DECIMAL: DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i]; if (writableCol != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java index 9669c91..5ce7553 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java @@ -158,7 +158,10 @@ public Writable serializeVector(VectorizedRowBatch vrg, ObjectInspector objInspe serializeVectorStream.write(bytes, 0, bytes.length); } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { + // Is it correct to escape CHAR and VARCHAR? BytesColumnVector bcv = (BytesColumnVector) batch.cols[k]; LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex], bcv.start[rowIndex], diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 2536817..193f503 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -278,7 +278,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP, + // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, CHAR, VARCHAR, TIMESTAMP, // DATE and DECIMAL switch (poi.getPrimitiveCategory()) { case BOOLEAN: @@ -296,6 +296,8 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException break; case BINARY: case STRING: + case CHAR: + case VARCHAR: result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); break; case DECIMAL: @@ -544,7 +546,9 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti } break; - case STRING: { + case STRING: + case CHAR: + case VARCHAR: { BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex]; String sVal = (String) value; if (sVal == null) { @@ -566,13 +570,17 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveExcepti } } - private void addScratchColumnsToBatch(VectorizedRowBatch vrb) { + private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException { if (columnTypeMap != null && !columnTypeMap.isEmpty()) { int origNumCols = vrb.numCols; int newNumCols = vrb.cols.length+columnTypeMap.keySet().size(); vrb.cols = Arrays.copyOf(vrb.cols, newNumCols); for (int i = origNumCols; i < newNumCols; i++) { - vrb.cols[i] = allocateColumnVector(columnTypeMap.get(i), + String typeName = columnTypeMap.get(i); + if (typeName == null) { + throw new HiveException("No type found for column type entry " + i); + } + vrb.cols[i] = allocateColumnVector(typeName, VectorizedRowBatch.DEFAULT_SIZE); } vrb.numCols = vrb.cols.length; @@ -599,13 +607,15 @@ private void addScratchColumnsToBatch(VectorizedRowBatch vrb) { private ColumnVector allocateColumnVector(String type, int defaultSize) { if (type.equalsIgnoreCase("double")) { return new DoubleColumnVector(defaultSize); - } else if (type.equalsIgnoreCase("string")) { + } else if (VectorizationContext.isStringFamily(type)) { return new BytesColumnVector(defaultSize); } else if (VectorizationContext.decimalTypePattern.matcher(type).matches()){ int [] precisionScale = getScalePrecisionFromDecimalType(type); return new DecimalColumnVector(defaultSize, precisionScale[0], precisionScale[1]); - } else { + } else if (type.equalsIgnoreCase("long")) { return new LongColumnVector(defaultSize); + } else { + return null; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java index 37ef8fa..eca239b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/AbstractFilterStringColLikeStringScalar.java @@ -420,8 +420,8 @@ public void setPattern(String pattern) { VectorExpressionDescriptor.Mode.FILTER) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java new file mode 100644 index 0000000..57dc92b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToCharViaLongToChar.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastBooleanToCharViaLongToChar extends CastBooleanToStringViaLongToString implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastBooleanToCharViaLongToChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + public CastBooleanToCharViaLongToChar() { + super(); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.rightTrimAndTruncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "Char"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java index d16bbb1..d13a896 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToStringViaLongToString.java @@ -22,17 +22,18 @@ public class CastBooleanToStringViaLongToString extends LongToStringUnaryUDF { private static final long serialVersionUID = 1L; - private transient byte[] temp; // space to put date string private static final byte[][] dictionary = { {'F', 'A', 'L', 'S', 'E'}, {'T', 'R', 'U', 'E'} }; + public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + public CastBooleanToStringViaLongToString() { super(); - temp = new byte[8]; } - public CastBooleanToStringViaLongToString(int inputColumn, int outputColumn) { - super(inputColumn, outputColumn); - temp = new byte[8]; + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); } @Override @@ -41,6 +42,6 @@ protected void func(BytesColumnVector outV, long[] vector, int i) { /* 0 is false and 1 is true in the input vector, so a simple dictionary is used * with two entries. 0 references FALSE and 1 references TRUE in the dictionary. */ - outV.setVal(i, dictionary[(int) vector[i]], 0, dictionary[(int) vector[i]].length); + assign(outV, i, dictionary[(int) vector[i]], dictionary[(int) vector[i]].length); } -} +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java new file mode 100644 index 0000000..1f7697e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastBooleanToVarCharViaLongToVarChar.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastBooleanToVarCharViaLongToVarChar extends CastBooleanToStringViaLongToString implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastBooleanToVarCharViaLongToVarChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + public CastBooleanToVarCharViaLongToVarChar() { + super(); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.truncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "Char"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java new file mode 100644 index 0000000..187f12b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToChar.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastDateToChar extends CastDateToString implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastDateToChar() { + super(); + } + + public CastDateToChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.rightTrimAndTruncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "Char"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java index 39334b8..00a974f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToString.java @@ -35,10 +35,15 @@ public CastDateToString(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } + // The assign method will be overridden for CHAR and VARCHAR. + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); + } + @Override protected void func(BytesColumnVector outV, long[] vector, int i) { dt.setTime(DateWritable.daysToMillis((int) vector[i])); byte[] temp = dt.toString().getBytes(); - outV.setVal(i, temp, 0, temp.length); + assign(outV, i, temp, temp.length); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java new file mode 100644 index 0000000..5ad745c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDateToVarChar.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastDateToVarChar extends CastDateToString implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastDateToVarChar() { + super(); + } + + public CastDateToVarChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.truncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "VarChar"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java new file mode 100644 index 0000000..aab3e70 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToChar.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +/** + * To support vectorized cast of decimal to string. + */ +public class CastDecimalToChar extends CastDecimalToString implements TruncStringOutput { + + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastDecimalToChar() { + super(); + } + + public CastDecimalToChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.rightTrimAndTruncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "Char"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java index a436fa8..fa0143f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToDecimal.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java index c0a99b7..6d01498 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToString.java @@ -36,6 +36,11 @@ public CastDecimalToString(int inputColumn, int outputColumn) { super(inputColumn, outputColumn); } + // The assign method will be overridden for CHAR and VARCHAR. + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); + } + @Override protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i) { String s = inV.vector[i].getHiveDecimalString(); @@ -47,6 +52,6 @@ protected void func(BytesColumnVector outV, DecimalColumnVector inV, int i) { // This should never happen. If it does, there is a bug. throw new RuntimeException("Internal error: unable to convert decimal to string"); } - outV.setVal(i, b, 0, b.length); + assign(outV, i, b, b.length); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java new file mode 100644 index 0000000..267b0b1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastDecimalToVarChar.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +/** + * To support vectorized cast of decimal to string. + */ +public class CastDecimalToVarChar extends CastDecimalToString implements TruncStringOutput { + + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastDecimalToVarChar() { + super(); + } + + public CastDecimalToVarChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.truncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "VarChar"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java new file mode 100644 index 0000000..27674c4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToChar.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastLongToChar extends CastLongToString implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastLongToChar() { + super(); + } + + public CastLongToChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.rightTrimAndTruncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "Char"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java index 43bdfc2..cdfc387 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToString.java @@ -34,9 +34,14 @@ public CastLongToString(int inputColumn, int outputColumn) { temp = new byte[20]; } + // The assign method will be overridden for CHAR and VARCHAR. + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + outV.setVal(i, bytes, 0, length); + } + @Override protected void func(BytesColumnVector outV, long[] vector, int i) { int len = MathExpr.writeLongToUTF8(temp, vector[i]); - outV.setVal(i, temp, 0, len); + assign(outV, i, temp, len); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java new file mode 100644 index 0000000..7c3dca2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastLongToVarChar.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastLongToVarChar extends CastLongToString implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastLongToVarChar() { + super(); + } + + public CastLongToVarChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void assign(BytesColumnVector outV, int i, byte[] bytes, int length) { + StringExpr.truncate(outV, i, bytes, 0, length, maxLength); + } + + @Override + public String getOutputType() { + return "VarChar"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java new file mode 100644 index 0000000..7c06ff5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToChar.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastStringGroupToChar extends StringUnaryUDFDirect implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastStringGroupToChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + public CastStringGroupToChar() { + super(); + } + + /** + * Do right trim and truncate for CHAR. + */ + protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { + StringExpr.rightTrimAndTruncate(outV, i, vector[i], start[i], length[i], maxLength); + } + @Override + public String getOutputType() { + return "Char"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java new file mode 100644 index 0000000..376ce92 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringGroupToVarChar.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + +public class CastStringGroupToVarChar extends StringUnaryUDFDirect implements TruncStringOutput { + private static final long serialVersionUID = 1L; + private int maxLength; // Must be manually set with setMaxLength. + + public CastStringGroupToVarChar(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + public CastStringGroupToVarChar() { + super(); + } + + /** + * Do right trim and truncate for CHAR. + */ + protected void func(BytesColumnVector outV, byte[][] vector, int[] start, int[] length, int i) { + StringExpr.truncate(outV, i, vector[i], start[i], length[i], maxLength); + } + @Override + public String getOutputType() { + return "VarChar"; + } + + @Override + public int getMaxLength() { + return maxLength; + } + + @Override + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 1d07615..e3e860c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -154,7 +154,7 @@ public String getOutputType() { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java index 7317141..b4ac2b1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDecimal.java @@ -159,7 +159,7 @@ public void setInputColumn(int inputColumn) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java new file mode 100644 index 0000000..4c7313c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CharScalarConcatStringGroupCol.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Vectorized instruction to concatenate a scalar to a string column and put + * the result in an output column. + */ +public class CharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol { + private static final long serialVersionUID = 1L; + + public CharScalarConcatStringGroupCol(HiveChar value, int colNum, int outputColumn) { + super(value.getStrippedValue().getBytes(), colNum, outputColumn); + } + + public CharScalarConcatStringGroupCol() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.CHAR, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java index 0e41a7c..4b1182c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/DecimalToStringUnaryUDF.java @@ -130,7 +130,7 @@ public void setInputColumn(int inputColumn) { @Override public String getOutputType() { - return "Decimal"; + return "String"; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java new file mode 100644 index 0000000..bc5fb5a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringGroupColumn.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveChar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string column or non-constant expression result. + */ +public class IfExprCharScalarStringGroupColumn extends IfExprStringScalarStringGroupColumn { + + private static final long serialVersionUID = 1L; + + public IfExprCharScalarStringGroupColumn(int arg1Column, HiveChar arg2Scalar, int arg3Column, int outputColumn) { + super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Column, outputColumn); + } + + public IfExprCharScalarStringGroupColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.CHAR, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java new file mode 100644 index 0000000..0fccf15 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprCharScalarStringScalar.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveChar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string scalar. + */ +public class IfExprCharScalarStringScalar extends IfExprStringScalarStringScalar { + + private static final long serialVersionUID = 1L; + + public IfExprCharScalarStringScalar( + int arg1Column, HiveChar arg2Scalar, byte[] arg3Scalar, int outputColumn) { + super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Scalar, outputColumn); + } + + public IfExprCharScalarStringScalar() { + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.CHAR, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java deleted file mode 100644 index 7cab0e5..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringColumn.java +++ /dev/null @@ -1,221 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Compute IF(expr1, expr2, expr3) for 3 input column expressions. - * The first is always a boolean (LongColumnVector). - * The second and third are string columns or string expression results. - */ -public class IfExprStringColumnStringColumn extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int arg1Column, arg2Column, arg3Column; - private int outputColumn; - - public IfExprStringColumnStringColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; - } - - public IfExprStringColumnStringColumn() { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; - BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; - BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later - int n = batch.size; - long[] vector1 = arg1ColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.initBuffer(); - - /* All the code paths below propagate nulls even if neither arg2 nor arg3 - * have nulls. This is to reduce the number of code paths and shorten the - * code, at the expense of maybe doing unnecessary work if neither input - * has nulls. This could be improved in the future by expanding the number - * of code paths. - */ - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); - } else { - arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); - } - return; - } - - // extend any repeating values and noNulls indicator in the inputs - arg2ColVector.flatten(batch.selectedInUse, sel, n); - arg3ColVector.flatten(batch.selectedInUse, sel, n); - - if (arg1ColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); - } - } else { - for(int i = 0; i != n; i++) { - if (vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (vector1[i] == 1 ? - arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); - } - } else { - for(int i = 0; i != n; i++) { - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); - } - } - } - arg2ColVector.unFlatten(); - arg3ColVector.unFlatten(); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "String"; - } - - public int getArg1Column() { - return arg1Column; - } - - public void setArg1Column(int colNum) { - this.arg1Column = colNum; - } - - public int getArg2Column() { - return arg2Column; - } - - public void setArg2Column(int colNum) { - this.arg2Column = colNum; - } - - public int getArg3Column() { - return arg3Column; - } - - public void setArg3Column(int colNum) { - this.arg3Column = colNum; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(3) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java deleted file mode 100644 index 159d2a2..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringColumnStringScalar.java +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Compute IF(expr1, expr2, expr3) for 3 input expressions. - * The first is always a boolean (LongColumnVector). - * The second is a string column expression. - * The third is a string scalar. - */ -public class IfExprStringColumnStringScalar extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int arg1Column, arg2Column; - private byte[] arg3Scalar; - private int outputColumn; - - public IfExprStringColumnStringScalar(int arg1Column, int arg2Column, byte[] arg3Scalar, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Column = arg2Column; - this.arg3Scalar = arg3Scalar; - this.outputColumn = outputColumn; - } - - public IfExprStringColumnStringScalar() { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; - BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg2ColVector.noNulls; - outputColVector.isRepeating = false; // may override later - int n = batch.size; - long[] vector1 = arg1ColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.initBuffer(); - - /* All the code paths below propagate nulls even if arg2 has no nulls. - * This is to reduce the number of code paths and shorten the - * code, at the expense of maybe doing unnecessary work if neither input - * has nulls. This could be improved in the future by expanding the number - * of code paths. - */ - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); - } else { - outputColVector.fill(arg3Scalar); - } - return; - } - - // extend any repeating values and noNulls indicator in the inputs - arg2ColVector.flatten(batch.selectedInUse, sel, n); - - if (arg1ColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); - } - outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); - } - } else { - for(int i = 0; i != n; i++) { - if (vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); - } - outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); - } - outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); - } - } else { - for(int i = 0; i != n; i++) { - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - if (!arg2ColVector.isNull[i]) { - outputColVector.setVal( - i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); - } - } else { - outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); - } - outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - arg2ColVector.isNull[i] : false); - } - } - } - - // restore state of repeating and non nulls indicators - arg2ColVector.unFlatten(); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "String"; - } - - public int getArg1Column() { - return arg1Column; - } - - public void setArg1Column(int colNum) { - this.arg1Column = colNum; - } - - public int getArg2Column() { - return arg2Column; - } - - public void setArg2Column(int colNum) { - this.arg2Column = colNum; - } - - public byte[] getArg3Scalar() { - return arg3Scalar; - } - - public void setArg3Scalar(byte[] value) { - this.arg3Scalar = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(3) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java new file mode 100644 index 0000000..46762f4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnCharScalar.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string column expression. + * The third is a string scalar. + */ +public class IfExprStringGroupColumnCharScalar extends IfExprStringGroupColumnStringScalar { + + private static final long serialVersionUID = 1L; + + public IfExprStringGroupColumnCharScalar(int arg1Column, int arg2Column, HiveChar arg3Scalar, int outputColumn) { + super(arg1Column, arg2Column, arg3Scalar.getValue().getBytes(), outputColumn); + } + + public IfExprStringGroupColumnCharScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.CHAR) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java new file mode 100644 index 0000000..c0c639b --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringGroupColumn.java @@ -0,0 +1,194 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second and third are string columns or string expression results. + */ +public class IfExprStringGroupColumnStringGroupColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column, arg3Column; + private int outputColumn; + + public IfExprStringGroupColumnStringGroupColumn(int arg1Column, int arg2Column, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprStringGroupColumnStringGroupColumn() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; + BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls && arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even if neither arg2 nor arg3 + * have nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : arg3ColVector.isNull[i]); + } + } + } + arg2ColVector.unFlatten(); + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java new file mode 100644 index 0000000..744bfa5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnStringScalar.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string column expression. + * The third is a string scalar. + */ +public class IfExprStringGroupColumnStringScalar extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg2Column; + private byte[] arg3Scalar; + private int outputColumn; + + public IfExprStringGroupColumnStringScalar(int arg1Column, int arg2Column, byte[] arg3Scalar, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Column = arg2Column; + this.arg3Scalar = arg3Scalar; + this.outputColumn = outputColumn; + } + + public IfExprStringGroupColumnStringScalar() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg2ColVector = (BytesColumnVector) batch.cols[arg2Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg2ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even if arg2 has no nulls. + * This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + arg2ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } else { + outputColVector.fill(arg3Scalar); + } + return; + } + + // extend any repeating values and noNulls indicator in the inputs + arg2ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (vector1[i] == 1 ? arg2ColVector.isNull[i] : false); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + if (!arg2ColVector.isNull[i]) { + outputColVector.setVal( + i, arg2ColVector.vector[i], arg2ColVector.start[i], arg2ColVector.length[i]); + } + } else { + outputColVector.setRef(i, arg3Scalar, 0, arg3Scalar.length); + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + arg2ColVector.isNull[i] : false); + } + } + } + + // restore state of repeating and non nulls indicators + arg2ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java new file mode 100644 index 0000000..f0bf76e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringGroupColumnVarCharScalar.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string column expression. + * The third is a string scalar. + */ +public class IfExprStringGroupColumnVarCharScalar extends IfExprStringGroupColumnStringScalar { + + private static final long serialVersionUID = 1L; + + public IfExprStringGroupColumnVarCharScalar(int arg1Column, int arg2Column, HiveVarchar arg3Scalar, int outputColumn) { + super(arg1Column, arg2Column, arg3Scalar.getValue().getBytes(), outputColumn); + } + + public IfExprStringGroupColumnVarCharScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.VARCHAR) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java new file mode 100644 index 0000000..9707027 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarCharScalar.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveChar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string scalar. + */ +public class IfExprStringScalarCharScalar extends IfExprStringScalarStringScalar { + + private static final long serialVersionUID = 1L; + + public IfExprStringScalarCharScalar( + int arg1Column, byte[] arg2Scalar, HiveChar arg3Scalar, int outputColumn) { + super(arg1Column, arg2Scalar, arg3Scalar.getValue().getBytes(), outputColumn); + } + + public IfExprStringScalarCharScalar() { + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.CHAR) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java deleted file mode 100644 index 562db72..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringColumn.java +++ /dev/null @@ -1,208 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; -import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; - -/** - * Compute IF(expr1, expr2, expr3) for 3 input column expressions. - * The first is always a boolean (LongColumnVector). - * The second is a string scalar. - * The third is a string column or non-constant expression result. - */ -public class IfExprStringScalarStringColumn extends VectorExpression { - - private static final long serialVersionUID = 1L; - - private int arg1Column, arg3Column; - private byte[] arg2Scalar; - private int outputColumn; - - public IfExprStringScalarStringColumn(int arg1Column, byte[] arg2Scalar, int arg3Column, int outputColumn) { - this.arg1Column = arg1Column; - this.arg2Scalar = arg2Scalar; - this.arg3Column = arg3Column; - this.outputColumn = outputColumn; - } - - public IfExprStringScalarStringColumn() { - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; - BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; - BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - boolean[] outputIsNull = outputColVector.isNull; - outputColVector.noNulls = arg3ColVector.noNulls; - outputColVector.isRepeating = false; // may override later - int n = batch.size; - long[] vector1 = arg1ColVector.vector; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - outputColVector.initBuffer(); - - /* All the code paths below propagate nulls even arg3 has no - * nulls. This is to reduce the number of code paths and shorten the - * code, at the expense of maybe doing unnecessary work if neither input - * has nulls. This could be improved in the future by expanding the number - * of code paths. - */ - if (arg1ColVector.isRepeating) { - if (vector1[0] == 1) { - outputColVector.fill(arg2Scalar); - } else { - arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); - } - return; - } - - // extend any repeating values and noNulls indicator in the input - arg3ColVector.flatten(batch.selectedInUse, sel, n); - - if (arg1ColVector.noNulls) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (vector1[i] == 1) { - outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); - } - } else { - for(int i = 0; i != n; i++) { - if (vector1[i] == 1) { - outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); - } - } - } else /* there are nulls */ { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - false : arg3ColVector.isNull[i]); - } - } else { - for(int i = 0; i != n; i++) { - if (!arg1ColVector.isNull[i] && vector1[i] == 1) { - outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); - } else { - if (!arg3ColVector.isNull[i]) { - outputColVector.setVal( - i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); - } - } - outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? - false : arg3ColVector.isNull[i]); - } - } - } - - // restore state of repeating and non nulls indicators - arg3ColVector.unFlatten(); - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "String"; - } - - public int getArg1Column() { - return arg1Column; - } - - public void setArg1Column(int colNum) { - this.arg1Column = colNum; - } - - public byte[] getArg2Scalar() { - return arg2Scalar; - } - - public void setArg2Scalar(byte[] value) { - this.arg2Scalar = value; - } - - public int getArg3Column() { - return arg3Column; - } - - public void setArg3Column(int colNum) { - this.arg3Column = colNum; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(3) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java new file mode 100644 index 0000000..151ac3e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringGroupColumn.java @@ -0,0 +1,181 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string column or non-constant expression result. + */ +public class IfExprStringScalarStringGroupColumn extends VectorExpression { + + private static final long serialVersionUID = 1L; + + private int arg1Column, arg3Column; + private byte[] arg2Scalar; + private int outputColumn; + + public IfExprStringScalarStringGroupColumn(int arg1Column, byte[] arg2Scalar, int arg3Column, int outputColumn) { + this.arg1Column = arg1Column; + this.arg2Scalar = arg2Scalar; + this.arg3Column = arg3Column; + this.outputColumn = outputColumn; + } + + public IfExprStringScalarStringGroupColumn() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + LongColumnVector arg1ColVector = (LongColumnVector) batch.cols[arg1Column]; + BytesColumnVector arg3ColVector = (BytesColumnVector) batch.cols[arg3Column]; + BytesColumnVector outputColVector = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + boolean[] outputIsNull = outputColVector.isNull; + outputColVector.noNulls = arg3ColVector.noNulls; + outputColVector.isRepeating = false; // may override later + int n = batch.size; + long[] vector1 = arg1ColVector.vector; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + outputColVector.initBuffer(); + + /* All the code paths below propagate nulls even arg3 has no + * nulls. This is to reduce the number of code paths and shorten the + * code, at the expense of maybe doing unnecessary work if neither input + * has nulls. This could be improved in the future by expanding the number + * of code paths. + */ + if (arg1ColVector.isRepeating) { + if (vector1[0] == 1) { + outputColVector.fill(arg2Scalar); + } else { + arg3ColVector.copySelected(batch.selectedInUse, sel, n, outputColVector); + } + return; + } + + // extend any repeating values and noNulls indicator in the input + arg3ColVector.flatten(batch.selectedInUse, sel, n); + + if (arg1ColVector.noNulls) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (vector1[i] == 1 ? false : arg3ColVector.isNull[i]); + } + } + } else /* there are nulls */ { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } else { + for(int i = 0; i != n; i++) { + if (!arg1ColVector.isNull[i] && vector1[i] == 1) { + outputColVector.setRef(i, arg2Scalar, 0, arg2Scalar.length); + } else { + if (!arg3ColVector.isNull[i]) { + outputColVector.setVal( + i, arg3ColVector.vector[i], arg3ColVector.start[i], arg3ColVector.length[i]); + } + } + outputIsNull[i] = (!arg1ColVector.isNull[i] && vector1[i] == 1 ? + false : arg3ColVector.isNull[i]); + } + } + } + + // restore state of repeating and non nulls indicators + arg3ColVector.unFlatten(); + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java index f6fcfea..6ca3af1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarStringScalar.java @@ -132,34 +132,6 @@ public String getOutputType() { return "String"; } - public int getArg1Column() { - return arg1Column; - } - - public void setArg1Column(int colNum) { - this.arg1Column = colNum; - } - - public byte[] getArg2Scalar() { - return arg2Scalar; - } - - public void setArg2Scalar(byte[] value) { - this.arg2Scalar = value; - } - - public byte[] getArg3Scalar() { - return arg3Scalar; - } - - public void setArg3Scalar(byte[] value) { - this.arg3Scalar = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - @Override public VectorExpressionDescriptor.Descriptor getDescriptor() { return (new VectorExpressionDescriptor.Builder()) @@ -167,9 +139,9 @@ public void setOutputColumn(int outputColumn) { VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(3) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.getType("long"), - VectorExpressionDescriptor.ArgumentType.getType("string"), - VectorExpressionDescriptor.ArgumentType.getType("string")) + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, VectorExpressionDescriptor.InputExpressionType.SCALAR, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java new file mode 100644 index 0000000..86324b9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprStringScalarVarCharScalar.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveVarchar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string scalar. + */ +public class IfExprStringScalarVarCharScalar extends IfExprStringScalarStringScalar { + + private static final long serialVersionUID = 1L; + + public IfExprStringScalarVarCharScalar( + int arg1Column, byte[] arg2Scalar, HiveVarchar arg3Scalar, int outputColumn) { + super(arg1Column, arg2Scalar, arg3Scalar.getValue().getBytes(), outputColumn); + } + + public IfExprStringScalarVarCharScalar() { + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.VARCHAR) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java new file mode 100644 index 0000000..0664b15 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringGroupColumn.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveVarchar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string column or non-constant expression result. + */ +public class IfExprVarCharScalarStringGroupColumn extends IfExprStringScalarStringGroupColumn { + + private static final long serialVersionUID = 1L; + + public IfExprVarCharScalarStringGroupColumn(int arg1Column, HiveVarchar arg2Scalar, int arg3Column, int outputColumn) { + super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Column, outputColumn); + } + + public IfExprVarCharScalarStringGroupColumn() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.VARCHAR, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java new file mode 100644 index 0000000..11844d5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/IfExprVarCharScalarStringScalar.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveVarchar; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Compute IF(expr1, expr2, expr3) for 3 input column expressions. + * The first is always a boolean (LongColumnVector). + * The second is a string scalar. + * The third is a string scalar. + */ +public class IfExprVarCharScalarStringScalar extends IfExprStringScalarStringScalar { + + private static final long serialVersionUID = 1L; + + public IfExprVarCharScalarStringScalar( + int arg1Column, HiveVarchar arg2Scalar, byte[] arg3Scalar, int outputColumn) { + super(arg1Column, arg2Scalar.getValue().getBytes(), arg3Scalar, outputColumn); + } + + public IfExprVarCharScalarStringScalar() { + } + + @Override + public String getOutputType() { + return "String"; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(3) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.LONG, + VectorExpressionDescriptor.ArgumentType.VARCHAR, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColCol.java deleted file mode 100644 index e6c2c23..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColCol.java +++ /dev/null @@ -1,455 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * Vectorized instruction to concatenate two string columns and put - * the output in a third column. - */ -public class StringConcatColCol extends VectorExpression { - private static final long serialVersionUID = 1L; - private int colNum1; - private int colNum2; - private int outputColumn; - - public StringConcatColCol(int colNum1, int colNum2, int outputColumn) { - this(); - this.colNum1 = colNum1; - this.colNum2 = colNum2; - this.outputColumn = outputColumn; - } - - public StringConcatColCol() { - super(); - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1]; - BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - byte[][] vector1 = inV1.vector; - byte[][] vector2 = inV2.vector; - int[] len1 = inV1.length; - int[] len2 = inV2.length; - int[] start1 = inV1.start; - int[] start2 = inV2.start; - - // return immediately if batch is empty - if (n == 0) { - return; - } - - // prepare output buffer to accept results - outV.initBuffer(); - - /* Handle default case for isRepeating setting for output. This will be set to true - * later in the special cases where that is necessary. - */ - outV.isRepeating = false; - - if (inV1.noNulls && !inV2.noNulls) { - - // propagate nulls - - /* We'll assume that there *may* be nulls in the input if !noNulls is true - * for an input vector. This is to be more forgiving of errors in loading - * the vectors. A properly-written vectorized iterator will make sure that - * isNull[0] is set if !noNulls and isRepeating are true for the vector. - */ - outV.noNulls = false; - if (inV2.isRepeating) { - if (inV2.isNull[0]) { - - // Output will also be repeating and null - outV.isNull[0] = true; - outV.isRepeating = true; - - //return as no further processing is needed - return; - } - } else { - propagateNulls(batch.selectedInUse, n, sel, inV2, outV); - } - - // perform data operation - if (inV1.isRepeating && inV2.isRepeating) { - - /* All must be selected otherwise size would be zero. - * Repeating property will not change. - */ - if (!inV2.isNull[0]) { - outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); - } - outV.isRepeating = true; - } else if (inV1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inV2.isNull[i]) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV2.isNull[0]) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } - } else if (inV2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inV2.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV2.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } - } else { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!inV2.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV2.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } - } - } else if (!inV1.noNulls && inV2.noNulls) { - - // propagate nulls - outV.noNulls = false; - if (inV1.isRepeating) { - - //Output will also be repeating and null - outV.isRepeating = true; - outV.isNull[0] = true; - - //return as no further processing is needed - return; - } else { - propagateNulls(batch.selectedInUse, n, sel, inV1, outV); - } - - // perform data operation - if (inV1.isRepeating && inV2.isRepeating) { - //All must be selected otherwise size would be zero - //Repeating property will not change. - if (!inV1.isNull[0]) { - outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); - } - outV.isRepeating = true; - } else if (inV1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inV1.isNull[0]) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV1.isNull[0]) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } - } else if (inV2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inV1.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV1.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } - } else { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!inV1.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV1.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } - } - } else if (!inV1.noNulls && !inV2.noNulls) { - - // propagate nulls - outV.noNulls = false; - if (inV1.isRepeating && inV2.isRepeating) { - outV.isNull[0] = inV1.isNull[0] || inV2.isNull[0]; - - //Output will also be repeating - outV.isRepeating = true; - - // return if output is null because no additional work is needed - if (outV.isNull[0]) { - return; - } - } else if (inV1.isRepeating) { - if (inV1.isNull[0]) { // then all output will be null - outV.isRepeating = true; - outV.isNull[0] = true; - return; - } else { - outV.isRepeating = false; - propagateNulls(batch.selectedInUse, n, sel, inV2, outV); - } - } else if (inV2.isRepeating) { - if (inV2.isNull[0]) { - outV.isRepeating = true; - outV.isNull[0] = true; - return; - } else { - outV.isRepeating = false; - propagateNulls(batch.selectedInUse, n, sel, inV1, outV); - } - } else { - propagateNullsCombine(batch.selectedInUse, n, sel, inV1, inV2, outV); - } - - // perform data operation - if (inV1.isRepeating && inV2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - if (!inV1.isNull[0] && !inV2.isNull[0]) { - outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); - } - outV.isRepeating = true; - } else if (inV1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inV1.isNull[0] && !inV2.isNull[i]) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV1.isNull[0] && !inV2.isNull[i]) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } - } else if (inV2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inV1.isNull[i] && !inV2.isNull[0]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV1.isNull[i] && !inV2.isNull[0]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } - } else { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - if (!inV1.isNull[i] && !inV2.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } else { - for(int i = 0; i != n; i++) { - if (!inV1.isNull[i] && !inV2.isNull[i]) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } - } - } else { // there are no nulls in either input vector - - // propagate null information - outV.noNulls = true; - - // perform data operation - if (inV1.isRepeating && inV2.isRepeating) { - - // All must be selected otherwise size would be zero. Repeating property will not change. - outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); - outV.isRepeating = true; - } else if (inV1.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); - } - } - } else if (inV2.isRepeating) { - if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } else { - for(int i = 0; i != n; i++) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); - } - } - } else { - if (batch.selectedInUse) { - for(int j=0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } else { - for(int i = 0; i != n; i++) { - outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); - } - } - } - } - } - - /** - * Propagate the logic OR of null vectors from two inputs to output. - * - * @param selectedInUse true/false flag to tell if sel[] is in use - * @param n number of qualifying rows - * @param sel selected value position array - * @param inV1 input vector 1 - * @param inV2 input vector 2 - * @param outV output vector - */ - private static void propagateNullsCombine(boolean selectedInUse, int n, int[] sel, - ColumnVector inV1, ColumnVector inV2, BytesColumnVector outV) { - if (selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.isNull[i] = inV1.isNull[i] || inV2.isNull[i]; - } - } else { - for(int i = 0; i != n; i++) { - outV.isNull[i] = inV1.isNull[i] || inV2.isNull[i]; - } - } - } - - /** - * Propagate nulls from input vector inV to output vector outV. - * - * @param selectedInUse true/false flag to tell if sel[] is in use - * @param sel selected value position array - * @param n number of qualifying rows - * @param inV input vector - * @param outV ouput vector - */ - private static void propagateNulls(boolean selectedInUse, int n, int[] sel, ColumnVector inV, - ColumnVector outV) { - if (selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.isNull[i] = inV.isNull[i]; - } - } else { - System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); - } - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "String"; - } - - public int getColNum1() { - return colNum1; - } - - public void setColNum1(int colNum1) { - this.colNum1 = colNum1; - } - - public int getColNum2() { - return colNum2; - } - - public void setColNum2(int colNum2) { - this.colNum2 = colNum2; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING, - VectorExpressionDescriptor.ArgumentType.STRING) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java deleted file mode 100644 index 08b0eef..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatColScalar.java +++ /dev/null @@ -1,164 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * Vectorized instruction to concatenate a string column to a scalar and put - * the result in an output column. - */ -public class StringConcatColScalar extends VectorExpression { - private static final long serialVersionUID = 1L; - private int colNum; - private int outputColumn; - private byte[] value; - - public StringConcatColScalar(int colNum, byte[] value, int outputColumn) { - this(); - this.colNum = colNum; - this.outputColumn = outputColumn; - this.value = value; - } - - public StringConcatColScalar() { - super(); - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int[] start = inputColVector.start; - int[] length = inputColVector.length; - - if (n == 0) { - - // Nothing to do - return; - } - - // initialize output vector buffer to receive data - outV.initBuffer(); - - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); - } - outV.isRepeating = false; - } - } else { - - /* - * Handle case with nulls. Don't do function if the value is null, to save time, - * because calling the function can be expensive. - */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inputColVector.isNull[i]) { - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); - } - outV.isNull[i] = inputColVector.isNull[i]; - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { - outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); - } - outV.isNull[i] = inputColVector.isNull[i]; - } - outV.isRepeating = false; - } - } - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "String"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public byte[] getValue() { - return value; - } - - public void setValue(byte[] value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING, - VectorExpressionDescriptor.ArgumentType.STRING) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN, - VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java deleted file mode 100644 index 63bb186..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringConcatScalarCol.java +++ /dev/null @@ -1,164 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.expressions; - -import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; -import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; - -/** - * Vectorized instruction to concatenate a scalar to a string column and put - * the result in an output column. - */ -public class StringConcatScalarCol extends VectorExpression { - private static final long serialVersionUID = 1L; - private int colNum; - private int outputColumn; - private byte[] value; - - public StringConcatScalarCol(byte[] value, int colNum, int outputColumn) { - this(); - this.colNum = colNum; - this.outputColumn = outputColumn; - this.value = value; - } - - public StringConcatScalarCol() { - super(); - } - - @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int[] start = inputColVector.start; - int[] length = inputColVector.length; - - if (n == 0) { - - // Nothing to do - return; - } - - // initialize output vector buffer to receive data - outV.initBuffer(); - - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); - } - outV.isRepeating = false; - } - } else { - - /* - * Handle case with nulls. Don't do function if the value is null, to save time, - * because calling the function can be expensive. - */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inputColVector.isNull[i]) { - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); - } - outV.isNull[i] = inputColVector.isNull[i]; - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { - outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); - } - outV.isNull[i] = inputColVector.isNull[i]; - } - outV.isRepeating = false; - } - } - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "String"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public byte[] getValue() { - return value; - } - - public void setValue(byte[] value) { - this.value = value; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - return (new VectorExpressionDescriptor.Builder()) - .setMode( - VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(2) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING, - VectorExpressionDescriptor.ArgumentType.STRING) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.SCALAR, - VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java index 92beb93..df71d8b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringExpr.java @@ -18,6 +18,10 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import java.util.Arrays; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; + /** * String expression evaluation helper functions. */ @@ -32,6 +36,7 @@ */ public static int compare(byte[] arg1, int start1, int len1, byte[] arg2, int start2, int len2) { for (int i = 0; i < len1 && i < len2; i++) { + // Note the "& 0xff" is just a way to convert unsigned bytes to signed integer. int b1 = arg1[i + start1] & 0xff; int b2 = arg2[i + start2] & 0xff; if (b1 != b2) { @@ -40,4 +45,259 @@ public static int compare(byte[] arg1, int start1, int len1, byte[] arg2, int st } return len1 - len2; } + + public static int characterCount(byte[] bytes) { + int end = bytes.length; + + // count characters + int j = 0; + int charCount = 0; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + ++charCount; + } + j++; + } + return charCount; + } + + public static int characterCount(byte[] bytes, int start, int length) { + int end = start + length; + + // count characters + int j = start; + int charCount = 0; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + ++charCount; + } + j++; + } + return charCount; + } + + // A setVal with the same function signature as rightTrim, leftTrim, truncate, etc, below. + // Useful for class generation via templates. + public static void assign(BytesColumnVector outV, int i, byte[] bytes, int start, int length) { + // set output vector + outV.setVal(i, bytes, start, length); + } + + /* + * Right trim a slice of a byte array and return the new byte length. + */ + public static int rightTrim(byte[] bytes, int start, int length) { + // skip trailing blank characters + int j = start + length - 1; + while(j >= start && bytes[j] == 0x20) { + j--; + } + + return (j - start) + 1; + } + + /* + * Right trim a slice of a byte array and place the result into element i of a vector. + */ + public static void rightTrim(BytesColumnVector outV, int i, byte[] bytes, int start, int length) { + // skip trailing blank characters + int j = start + length - 1; + while(j >= start && bytes[j] == 0x20) { + j--; + } + + // set output vector + outV.setVal(i, bytes, start, (j - start) + 1); + } + + /* + * Truncate a slice of a byte array to a maximum number of characters and + * return the new byte length. + */ + public static int truncate(byte[] bytes, int start, int length, int maxLength) { + int end = start + length; + + // count characters forward + int j = start; + int charCount = 0; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + if (charCount == maxLength) { + break; + } + ++charCount; + } + j++; + } + return (j - start); + } + + /* + * Truncate a slice of a byte array to a maximum number of characters and + * place the result into element i of a vector. + */ + public static void truncate(BytesColumnVector outV, int i, byte[] bytes, int start, int length, int maxLength) { + int end = start + length; + + // count characters forward + int j = start; + int charCount = 0; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + if (charCount == maxLength) { + break; + } + ++charCount; + } + j++; + } + + // set output vector + outV.setVal(i, bytes, start, (j - start)); + } + + /* + * Truncate a byte array to a maximum number of characters and + * return a byte array with only truncated bytes. + */ + public static byte[] truncateScalar(byte[] bytes, int maxLength) { + int end = bytes.length; + + // count characters forward + int j = 0; + int charCount = 0; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + if (charCount == maxLength) { + break; + } + ++charCount; + } + j++; + } + if (j == end) { + return bytes; + } else { + return Arrays.copyOf(bytes, j); + } + } + + /* + * Right trim and truncate a slice of a byte array to a maximum number of characters and + * return the new byte length. + */ + public static int rightTrimAndTruncate(byte[] bytes, int start, int length, int maxLength) { + int end = start + length; + + // count characters forward and watch for final run of pads + int j = start; + int charCount = 0; + int padRunStart = -1; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + if (charCount == maxLength) { + break; + } + if (bytes[j] == 0x20) { + if (padRunStart == -1) { + padRunStart = j; + } + } else { + padRunStart = -1; + } + ++charCount; + } else { + padRunStart = -1; + } + j++; + } + if (padRunStart != -1) { + return (padRunStart - start); + } else { + return (j - start); + } + } + + /* + * Right trim and truncate a slice of a byte array to a maximum number of characters and + * place the result into element i of a vector. + */ + public static void rightTrimAndTruncate(BytesColumnVector outV, int i, byte[] bytes, int start, int length, int maxLength) { + int end = start + length; + + // count characters forward and watch for final run of pads + int j = start; + int charCount = 0; + int padRunStart = -1; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + if (charCount == maxLength) { + break; + } + if (bytes[j] == 0x20) { + if (padRunStart == -1) { + padRunStart = j; + } + } else { + padRunStart = -1; + } + ++charCount; + } else { + padRunStart = -1; + } + j++; + } + // set output vector + if (padRunStart != -1) { + outV.setVal(i, bytes, start, (padRunStart - start)); + } else { + outV.setVal(i, bytes, start, (j - start) ); + } + } + + /* + * Right trim and truncate a byte array to a maximum number of characters and + * return a byte array with only the trimmed and truncated bytes. + */ + public static byte[] rightTrimAndTruncateScalar(byte[] bytes, int maxLength) { + int end = bytes.length; + + // count characters forward and watch for final run of pads + int j = 0; + int charCount = 0; + int padRunStart = -1; + while(j < end) { + // UTF-8 continuation bytes have 2 high bits equal to 0x80. + if ((bytes[j] & 0xc0) != 0x80) { + if (charCount == maxLength) { + break; + } + if (bytes[j] == 0x20) { + if (padRunStart == -1) { + padRunStart = j; + } + } else { + padRunStart = -1; + } + ++charCount; + } else { + padRunStart = -1; + } + j++; + } + if (padRunStart != -1) { + return Arrays.copyOf(bytes, padRunStart); + } else if (j == end) { + return bytes; + } else { + return Arrays.copyOf(bytes, j); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java new file mode 100644 index 0000000..f92f7a9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatCharScalar.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Vectorized instruction to concatenate a string column to a scalar and put + * the result in an output column. + */ +public class StringGroupColConcatCharScalar extends StringGroupColConcatStringScalar { + private static final long serialVersionUID = 1L; + + public StringGroupColConcatCharScalar(int colNum, HiveChar value, int outputColumn) { + super(colNum, value.getStrippedValue().getBytes(), outputColumn); + } + + public StringGroupColConcatCharScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.CHAR) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java new file mode 100644 index 0000000..5b24cd6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatStringScalar.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Vectorized instruction to concatenate a string column to a scalar and put + * the result in an output column. + */ +public class StringGroupColConcatStringScalar extends VectorExpression { + private static final long serialVersionUID = 1L; + private int colNum; + private int outputColumn; + private byte[] value; + + public StringGroupColConcatStringScalar(int colNum, byte[] value, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + this.value = value; + } + + public StringGroupColConcatStringScalar() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] start = inputColVector.start; + int[] length = inputColVector.length; + + if (n == 0) { + + // Nothing to do + return; + } + + // initialize output vector buffer to receive data + outV.initBuffer(); + + if (inputColVector.noNulls) { + outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + } + outV.isRepeating = false; + } + } else { + + /* + * Handle case with nulls. Don't do function if the value is null, to save time, + * because calling the function can be expensive. + */ + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inputColVector.isNull[0]; + if (!inputColVector.isNull[0]) { + outV.setConcat(0, vector[0], start[0], length[0], value, 0, value.length); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inputColVector.isNull[i]) { + outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + outV.setConcat(i, vector[i], start[i], length[i], value, 0, value.length); + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "StringGroup"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java new file mode 100644 index 0000000..0d53264 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupColConcatVarCharScalar.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Vectorized instruction to concatenate a string column to a scalar and put + * the result in an output column. + */ +public class StringGroupColConcatVarCharScalar extends StringGroupColConcatStringScalar { + private static final long serialVersionUID = 1L; + + public StringGroupColConcatVarCharScalar(int colNum, HiveVarchar value, int outputColumn) { + super(colNum, value.getValue().getBytes(), outputColumn); + } + + public StringGroupColConcatVarCharScalar() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.VARCHAR) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.SCALAR).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java new file mode 100644 index 0000000..3cb7af5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringGroupConcatColCol.java @@ -0,0 +1,455 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Vectorized instruction to concatenate two string columns and put + * the output in a third column. + */ +public class StringGroupConcatColCol extends VectorExpression { + private static final long serialVersionUID = 1L; + private int colNum1; + private int colNum2; + private int outputColumn; + + public StringGroupConcatColCol(int colNum1, int colNum2, int outputColumn) { + this(); + this.colNum1 = colNum1; + this.colNum2 = colNum2; + this.outputColumn = outputColumn; + } + + public StringGroupConcatColCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1]; + BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2]; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + byte[][] vector1 = inV1.vector; + byte[][] vector2 = inV2.vector; + int[] len1 = inV1.length; + int[] len2 = inV2.length; + int[] start1 = inV1.start; + int[] start2 = inV2.start; + + // return immediately if batch is empty + if (n == 0) { + return; + } + + // prepare output buffer to accept results + outV.initBuffer(); + + /* Handle default case for isRepeating setting for output. This will be set to true + * later in the special cases where that is necessary. + */ + outV.isRepeating = false; + + if (inV1.noNulls && !inV2.noNulls) { + + // propagate nulls + + /* We'll assume that there *may* be nulls in the input if !noNulls is true + * for an input vector. This is to be more forgiving of errors in loading + * the vectors. A properly-written vectorized iterator will make sure that + * isNull[0] is set if !noNulls and isRepeating are true for the vector. + */ + outV.noNulls = false; + if (inV2.isRepeating) { + if (inV2.isNull[0]) { + + // Output will also be repeating and null + outV.isNull[0] = true; + outV.isRepeating = true; + + //return as no further processing is needed + return; + } + } else { + propagateNulls(batch.selectedInUse, n, sel, inV2, outV); + } + + // perform data operation + if (inV1.isRepeating && inV2.isRepeating) { + + /* All must be selected otherwise size would be zero. + * Repeating property will not change. + */ + if (!inV2.isNull[0]) { + outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); + } + outV.isRepeating = true; + } else if (inV1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inV2.isNull[i]) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV2.isNull[0]) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } + } else if (inV2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inV2.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV2.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!inV2.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV2.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } + } + } else if (!inV1.noNulls && inV2.noNulls) { + + // propagate nulls + outV.noNulls = false; + if (inV1.isRepeating) { + + //Output will also be repeating and null + outV.isRepeating = true; + outV.isNull[0] = true; + + //return as no further processing is needed + return; + } else { + propagateNulls(batch.selectedInUse, n, sel, inV1, outV); + } + + // perform data operation + if (inV1.isRepeating && inV2.isRepeating) { + //All must be selected otherwise size would be zero + //Repeating property will not change. + if (!inV1.isNull[0]) { + outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); + } + outV.isRepeating = true; + } else if (inV1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inV1.isNull[0]) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV1.isNull[0]) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } + } else if (inV2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inV1.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV1.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!inV1.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV1.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } + } + } else if (!inV1.noNulls && !inV2.noNulls) { + + // propagate nulls + outV.noNulls = false; + if (inV1.isRepeating && inV2.isRepeating) { + outV.isNull[0] = inV1.isNull[0] || inV2.isNull[0]; + + //Output will also be repeating + outV.isRepeating = true; + + // return if output is null because no additional work is needed + if (outV.isNull[0]) { + return; + } + } else if (inV1.isRepeating) { + if (inV1.isNull[0]) { // then all output will be null + outV.isRepeating = true; + outV.isNull[0] = true; + return; + } else { + outV.isRepeating = false; + propagateNulls(batch.selectedInUse, n, sel, inV2, outV); + } + } else if (inV2.isRepeating) { + if (inV2.isNull[0]) { + outV.isRepeating = true; + outV.isNull[0] = true; + return; + } else { + outV.isRepeating = false; + propagateNulls(batch.selectedInUse, n, sel, inV1, outV); + } + } else { + propagateNullsCombine(batch.selectedInUse, n, sel, inV1, inV2, outV); + } + + // perform data operation + if (inV1.isRepeating && inV2.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + if (!inV1.isNull[0] && !inV2.isNull[0]) { + outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); + } + outV.isRepeating = true; + } else if (inV1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inV1.isNull[0] && !inV2.isNull[i]) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV1.isNull[0] && !inV2.isNull[i]) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } + } else if (inV2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inV1.isNull[i] && !inV2.isNull[0]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV1.isNull[i] && !inV2.isNull[0]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + if (!inV1.isNull[i] && !inV2.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } else { + for(int i = 0; i != n; i++) { + if (!inV1.isNull[i] && !inV2.isNull[i]) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } + } + } else { // there are no nulls in either input vector + + // propagate null information + outV.noNulls = true; + + // perform data operation + if (inV1.isRepeating && inV2.isRepeating) { + + // All must be selected otherwise size would be zero. Repeating property will not change. + outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]); + outV.isRepeating = true; + } else if (inV1.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } else { + for(int i = 0; i != n; i++) { + outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]); + } + } + } else if (inV2.isRepeating) { + if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } else { + for(int i = 0; i != n; i++) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]); + } + } + } else { + if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } else { + for(int i = 0; i != n; i++) { + outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]); + } + } + } + } + } + + /** + * Propagate the logic OR of null vectors from two inputs to output. + * + * @param selectedInUse true/false flag to tell if sel[] is in use + * @param n number of qualifying rows + * @param sel selected value position array + * @param inV1 input vector 1 + * @param inV2 input vector 2 + * @param outV output vector + */ + private static void propagateNullsCombine(boolean selectedInUse, int n, int[] sel, + ColumnVector inV1, ColumnVector inV2, BytesColumnVector outV) { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV1.isNull[i] || inV2.isNull[i]; + } + } else { + for(int i = 0; i != n; i++) { + outV.isNull[i] = inV1.isNull[i] || inV2.isNull[i]; + } + } + } + + /** + * Propagate nulls from input vector inV to output vector outV. + * + * @param selectedInUse true/false flag to tell if sel[] is in use + * @param sel selected value position array + * @param n number of qualifying rows + * @param inV input vector + * @param outV ouput vector + */ + private static void propagateNulls(boolean selectedInUse, int n, int[] sel, ColumnVector inV, + ColumnVector outV) { + if (selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + } + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "StringGroup"; + } + + public int getColNum1() { + return colNum1; + } + + public void setColNum1(int colNum1) { + this.colNum1 = colNum1; + } + + public int getColNum2() { + return colNum2; + } + + public void setColNum2(int colNum2) { + this.colNum2 = colNum2; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index b1b915e..edecf79 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -162,7 +162,7 @@ public void setOutputColumn(int outputColumn) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java new file mode 100644 index 0000000..0118f79 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringScalarConcatStringGroupCol.java @@ -0,0 +1,164 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Vectorized instruction to concatenate a scalar to a string column and put + * the result in an output column. + */ +public class StringScalarConcatStringGroupCol extends VectorExpression { + private static final long serialVersionUID = 1L; + private int colNum; + private int outputColumn; + private byte[] value; + + public StringScalarConcatStringGroupCol(byte[] value, int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + this.value = value; + } + + public StringScalarConcatStringGroupCol() { + super(); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + byte[][] vector = inputColVector.vector; + int[] start = inputColVector.start; + int[] length = inputColVector.length; + + if (n == 0) { + + // Nothing to do + return; + } + + // initialize output vector buffer to receive data + outV.initBuffer(); + + if (inputColVector.noNulls) { + outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + } + outV.isRepeating = false; + } + } else { + + /* + * Handle case with nulls. Don't do function if the value is null, to save time, + * because calling the function can be expensive. + */ + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inputColVector.isNull[0]; + if (!inputColVector.isNull[0]) { + outV.setConcat(0, value, 0, value.length, vector[0], start[0], length[0]); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inputColVector.isNull[i]) { + outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + outV.setConcat(i, value, 0, value.length, vector[i], start[i], length[i]); + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "StringGroup"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public byte[] getValue() { + return value; + } + + public void setValue(byte[] value) { + this.value = value; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java index 057d4a3..75796b3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStart.java @@ -221,7 +221,7 @@ public int getOutputColumn() { @Override public String getOutputType() { - return "string"; + return "StringGroup"; } public int getStartIdx() { @@ -250,7 +250,7 @@ public void setOutputColumn(int outputColumn) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(2) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, VectorExpressionDescriptor.ArgumentType.LONG) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java index 477dc67..ae596ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringSubstrColStartLen.java @@ -240,7 +240,7 @@ public int getOutputColumn() { @Override public String getOutputType() { - return "string"; + return "StringGroup"; } public int getStartIdx() { @@ -277,7 +277,7 @@ public void setOutputColumn(int outputColumn) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(3) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP, VectorExpressionDescriptor.ArgumentType.LONG, VectorExpressionDescriptor.ArgumentType.LONG) .setInputExpressionTypes( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java index 7985ca0..46f17db 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDF.java @@ -200,7 +200,7 @@ public void setOutputColumn(int outputColumn) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java index 8050b59..7d11edb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringUnaryUDFDirect.java @@ -142,7 +142,7 @@ public String getOutputType() { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TruncStringOutput.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TruncStringOutput.java new file mode 100644 index 0000000..7cf1683 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/TruncStringOutput.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +public interface TruncStringOutput { + abstract public int getMaxLength(); + + abstract public void setMaxLength(int maxLength); +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java new file mode 100644 index 0000000..7fe960e --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VarCharScalarConcatStringGroupCol.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; + +/** + * Vectorized instruction to concatenate a scalar to a string column and put + * the result in an output column. + */ +public class VarCharScalarConcatStringGroupCol extends StringScalarConcatStringGroupCol { + private static final long serialVersionUID = 1L; + + public VarCharScalarConcatStringGroupCol(HiveVarchar value, int colNum, int outputColumn) { + super(value.getValue().getBytes(), colNum, outputColumn); + } + + public VarCharScalarConcatStringGroupCol() { + super(); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()) + .setMode( + VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(2) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.VARCHAR, + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.SCALAR, + VectorExpressionDescriptor.InputExpressionType.COLUMN).build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 726413c..5995eac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -30,16 +30,19 @@ */ public abstract class VectorExpression implements Serializable { public enum Type { - STRING, TIMESTAMP, DATE, OTHER; + STRING, CHAR, VARCHAR, TIMESTAMP, DATE, OTHER; private static Map types = ImmutableMap.builder() .put("string", STRING) + .put("char", CHAR) + .put("varchar", VARCHAR) .put("timestamp", TIMESTAMP) .put("date", DATE) .build(); public static Type getValue(String name) { - if (types.containsKey(name.toLowerCase())) { - return types.get(name); + String nameLower = name.toLowerCase(); + if (types.containsKey(nameLower)) { + return types.get(nameLower); } return OTHER; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java index c2bc012..a67cca3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpressionWriterFactory.java @@ -28,6 +28,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.*; @@ -46,6 +47,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector; @@ -403,6 +405,9 @@ public static VectorExpressionWriter genVectorExpressionWritable( case STRING: return genVectorExpressionWritableString( (SettableStringObjectInspector) fieldObjInspector); + case CHAR: + return genVectorExpressionWritableChar( + (SettableHiveCharObjectInspector) fieldObjInspector); case VARCHAR: return genVectorExpressionWritableVarchar( (SettableHiveVarcharObjectInspector) fieldObjInspector); @@ -558,6 +563,46 @@ public Object initValue(Object ignored) { }.init(fieldObjInspector); } + private static VectorExpressionWriter genVectorExpressionWritableChar( + SettableHiveCharObjectInspector fieldObjInspector) throws HiveException { + return new VectorExpressionWriterBytes() { + private Object obj; + private Text text; + + public VectorExpressionWriter init(SettableHiveCharObjectInspector objInspector) + throws HiveException { + super.init(objInspector); + this.text = new Text(); + this.obj = initValue(null); + return this; + } + + @Override + public Object writeValue(byte[] value, int start, int length) throws HiveException { + text.set(value, start, length); + ((SettableHiveCharObjectInspector) this.objectInspector).set(this.obj, text.toString()); + return this.obj; + } + + @Override + public Object setValue(Object field, byte[] value, int start, int length) + throws HiveException { + if (null == field) { + field = initValue(null); + } + text.set(value, start, length); + ((SettableHiveCharObjectInspector) this.objectInspector).set(field, text.toString()); + return field; + } + + @Override + public Object initValue(Object ignored) { + return ((SettableHiveCharObjectInspector) this.objectInspector) + .create(new HiveChar(StringUtils.EMPTY, -1)); + } + }.init(fieldObjInspector); + } + private static VectorExpressionWriter genVectorExpressionWritableVarchar( SettableHiveVarcharObjectInspector fieldObjInspector) throws HiveException { return new VectorExpressionWriterBytes() { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java index cfeef5a..c0a1101 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColCol.java @@ -127,6 +127,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: // Now disregard null in second pass. if ((inputColVector1.isRepeating) && (inputColVector2.isRepeating)) { // All must be selected otherwise size would be zero @@ -144,6 +146,8 @@ public void evaluate(VectorizedRowBatch batch) { } } break; + default: + throw new Error("Unsupported input type " + inputTypes[0].name()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java index 891383d..c8abb21 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddColScalar.java @@ -165,6 +165,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: if (inputCol.noNulls) { outV.noNulls = true; if (batch.selectedInUse) { @@ -199,6 +201,8 @@ public void evaluate(VectorizedRowBatch batch) { } } break; + default: + throw new Error("Unsupported input type " + inputTypes[0].name()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java index 964c943..51223c1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateAddScalarCol.java @@ -84,6 +84,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: try { baseDate = formatter.parse(new String(stringValue, "UTF-8")); break; @@ -101,6 +103,8 @@ public void evaluate(VectorizedRowBatch batch) { } return; } + default: + throw new Error("Unsupported input type " + inputTypes[0].name()); } if(batch.size == 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java index 44ee23a..8818213 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColCol.java @@ -191,12 +191,14 @@ private LongColumnVector toDateArray(VectorizedRowBatch batch, Type colType, return dateVector; case STRING: + case CHAR: + case VARCHAR: BytesColumnVector bcv = (BytesColumnVector) inputColVector; copySelected(bcv, batch.selectedInUse, batch.selected, batch.size, dateVector); return dateVector; + default: + throw new Error("Unsupported input type " + colType.name()); } - - return null; } // Copy the current object contents into the output. Only copy selected entries, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java index 5bf3b66..7dedf1a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffColScalar.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; +import org.apache.hadoop.hive.metastore.parser.ExpressionTree.Operator; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; @@ -90,6 +91,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: try { date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); baseDate = DateWritable.dateToDays(date); @@ -108,6 +111,8 @@ public void evaluate(VectorizedRowBatch batch) { } return; } + default: + throw new Error("Invalid input type #1: " + inputTypes[1].name()); } switch (inputTypes[0]) { @@ -184,6 +189,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: if (inputCol.noNulls) { outV.noNulls = true; if (batch.selectedInUse) { @@ -218,6 +225,8 @@ public void evaluate(VectorizedRowBatch batch) { } } break; + default: + throw new Error("Invalid input type #0: " + inputTypes[0].name()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java index 7099c79..ab389bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateDiffScalarCol.java @@ -90,6 +90,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: try { date.setTime(formatter.parse(new String(stringValue, "UTF-8")).getTime()); baseDate = DateWritable.dateToDays(date); @@ -108,6 +110,8 @@ public void evaluate(VectorizedRowBatch batch) { } return; } + default: + throw new Error("Unsupported input type " + inputTypes[0].name()); } switch (inputTypes[1]) { @@ -184,6 +188,8 @@ public void evaluate(VectorizedRowBatch batch) { break; case STRING: + case CHAR: + case VARCHAR: if (inputCol.noNulls) { outV.noNulls = true; if (batch.selectedInUse) { @@ -218,6 +224,8 @@ public void evaluate(VectorizedRowBatch batch) { } } break; + default: + throw new Error("Unsupported input type " + inputTypes[1].name()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java index 2a8c6e9..d1fd674 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFTimestampFieldString.java @@ -181,7 +181,7 @@ public void setOutputColumn(int outputColumn) { b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) .setNumArguments(1) .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING) + VectorExpressionDescriptor.ArgumentType.STRING_GROUP) .setInputExpressionTypes( VectorExpressionDescriptor.InputExpressionType.COLUMN); return b.build(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java index fb91b9f..3832e7b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.vector.*; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -30,8 +31,12 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; /** @@ -215,6 +220,35 @@ private void setOutputCol(ColumnVector colVec, int i, Object value) { t = ((WritableStringObjectInspector) outputOI).getPrimitiveWritableObject(value); } bv.setVal(i, t.getBytes(), 0, t.getLength()); + } else if (outputOI instanceof WritableHiveCharObjectInspector) { + WritableHiveCharObjectInspector writableHiveCharObjectOI = (WritableHiveCharObjectInspector) outputOI; + int maxLength = ((CharTypeInfo) writableHiveCharObjectOI.getTypeInfo()).getLength(); + BytesColumnVector bv = (BytesColumnVector) colVec; + + HiveCharWritable hiveCharWritable; + if (value instanceof HiveCharWritable) { + hiveCharWritable = ((HiveCharWritable) value); + } else { + hiveCharWritable = writableHiveCharObjectOI.getPrimitiveWritableObject(value); + } + Text t = hiveCharWritable.getTextValue(); + + // In vector mode, we stored CHAR as unpadded. + StringExpr.rightTrimAndTruncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength); + } else if (outputOI instanceof WritableHiveVarcharObjectInspector) { + WritableHiveVarcharObjectInspector writableHiveVarcharObjectOI = (WritableHiveVarcharObjectInspector) outputOI; + int maxLength = ((VarcharTypeInfo) writableHiveVarcharObjectOI.getTypeInfo()).getLength(); + BytesColumnVector bv = (BytesColumnVector) colVec; + + HiveVarcharWritable hiveVarcharWritable; + if (value instanceof HiveVarcharWritable) { + hiveVarcharWritable = ((HiveVarcharWritable) value); + } else { + hiveVarcharWritable = writableHiveVarcharObjectOI.getPrimitiveWritableObject(value); + } + Text t = hiveVarcharWritable.getTextValue(); + + StringExpr.truncate(bv, i, t.getBytes(), 0, t.getLength(), maxLength); } else if (outputOI instanceof WritableIntObjectInspector) { LongColumnVector lv = (LongColumnVector) colVec; if (value instanceof Integer) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index 8bf1632..37fead1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue; @@ -908,10 +909,10 @@ void skipRows(long items) throws IOException { } private static class BinaryTreeReader extends TreeReader{ - private InStream stream; - private IntegerReader lengths = null; + protected InStream stream; + protected IntegerReader lengths = null; - private final LongColumnVector scratchlcv; + protected final LongColumnVector scratchlcv; BinaryTreeReader(Path path, int columnId, Configuration conf) { super(path, columnId, conf); @@ -983,7 +984,7 @@ Object nextVector(Object previousVector, long batchSize) throws IOException { // Read present/isNull stream super.nextVector(result, batchSize); - BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); + BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); return result; } @@ -1376,12 +1377,13 @@ void skipRows(long items) throws IOException { } } + // This class collects together very similar methods for reading an ORC vector of byte arrays and + // creating the BytesColumnVector. + // private static class BytesColumnVectorUtil { - // This method has the common code for reading in bytes into a BytesColumnVector. - // It is used by the BINARY, STRING, CHAR, VARCHAR types. - public static void setRefToOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, - BytesColumnVector result, long batchSize) throws IOException { + private static byte[] commonReadByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, + BytesColumnVector result, long batchSize) throws IOException { // Read lengths scratchlcv.isNull = result.isNull; // Notice we are replacing the isNull vector here... lengths.nextVector(scratchlcv, batchSize); @@ -1409,11 +1411,20 @@ public static void setRefToOrcByteArrays(InStream stream, IntegerReader lengths, } len -= bytesRead; offset += bytesRead; - } + } + + return allBytes; + } + + // This method has the common code for reading in bytes into a BytesColumnVector. + public static void readOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv, + BytesColumnVector result, long batchSize) throws IOException { + + byte[] allBytes = commonReadByteArrays(stream, lengths, scratchlcv, result, batchSize); // Too expensive to figure out 'repeating' by comparisons. result.isRepeating = false; - offset = 0; + int offset = 0; if (!scratchlcv.isRepeating) { for (int i = 0; i < batchSize; i++) { if (!scratchlcv.isNull[i]) { @@ -1518,7 +1529,7 @@ Object nextVector(Object previousVector, long batchSize) throws IOException { // Read present/isNull stream super.nextVector(result, batchSize); - BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); + BytesColumnVectorUtil.readOrcByteArrays(stream, lengths, scratchlcv, result, batchSize); return result; } @@ -1734,6 +1745,42 @@ Object next(Object previous) throws IOException { result.enforceMaxLength(maxLength); return result; } + + @Override + Object nextVector(Object previousVector, long batchSize) throws IOException { + // Get the vector of strings from StringTreeReader, then make a 2nd pass to + // adjust down the length (right trim and truncate) if necessary. + BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); + + int adjustedDownLen; + if (result.isRepeating) { + if (result.noNulls || !result.isNull[0]) { + adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[0], result.start[0], result.length[0], maxLength); + if (adjustedDownLen < result.length[0]) { + result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); + } + } + } else { + if (result.noNulls){ + for (int i = 0; i < batchSize; i++) { + adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } else { + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + adjustedDownLen = StringExpr.rightTrimAndTruncate(result.vector[i], result.start[i], result.length[i], maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } + } + } + return result; + } } private static class VarcharTreeReader extends StringTreeReader { @@ -1762,6 +1809,42 @@ Object next(Object previous) throws IOException { result.enforceMaxLength(maxLength); return result; } + + @Override + Object nextVector(Object previousVector, long batchSize) throws IOException { + // Get the vector of strings from StringTreeReader, then make a 2nd pass to + // adjust down the length (truncate) if necessary. + BytesColumnVector result = (BytesColumnVector) super.nextVector(previousVector, batchSize); + + int adjustedDownLen; + if (result.isRepeating) { + if (result.noNulls || !result.isNull[0]) { + adjustedDownLen = StringExpr.truncate(result.vector[0], result.start[0], result.length[0], maxLength); + if (adjustedDownLen < result.length[0]) { + result.setRef(0, result.vector[0], result.start[0], adjustedDownLen); + } + } + } else { + if (result.noNulls){ + for (int i = 0; i < batchSize; i++) { + adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } else { + for (int i = 0; i < batchSize; i++) { + if (!result.isNull[i]) { + adjustedDownLen = StringExpr.truncate(result.vector[i], result.start[i], result.length[i], maxLength); + if (adjustedDownLen < result.length[i]) { + result.setRef(i, result.vector[i], result.start[i], adjustedDownLen); + } + } + } + } + } + return result; + } } private static class StructTreeReader extends TreeReader { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 05b7f48..2795a41 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -156,6 +156,10 @@ public Vectorizer() { // The regex matches only the "decimal" prefix of the type. patternBuilder.append("|decimal.*"); + // CHAR and VARCHAR types can be specified with maximum length. + patternBuilder.append("|char.*"); + patternBuilder.append("|varchar.*"); + supportedDataTypesPattern = Pattern.compile(patternBuilder.toString()); supportedGenericUDFs.add(GenericUDFOPPlus.class); @@ -248,6 +252,8 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFTimestamp.class); supportedGenericUDFs.add(GenericUDFToDecimal.class); supportedGenericUDFs.add(GenericUDFToDate.class); + supportedGenericUDFs.add(GenericUDFToChar.class); + supportedGenericUDFs.add(GenericUDFToVarchar.class); // For conditional expressions supportedGenericUDFs.add(GenericUDFIf.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java index d4d7e7c..fac13ba 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java @@ -23,9 +23,13 @@ import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColCol; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatColScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringConcatScalarCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupColConcatVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringScalarConcatStringGroupCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CharScalarConcatStringGroupCol; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VarCharScalarConcatStringGroupCol; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -49,8 +53,11 @@ + "Example:\n" + " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" + " 'abcdef'") -@VectorizedExpressions({StringConcatColCol.class, StringConcatColScalar.class, - StringConcatScalarCol.class}) +@VectorizedExpressions({StringGroupConcatColCol.class, + StringGroupColConcatStringScalar.class, + StringGroupColConcatCharScalar.class, StringGroupColConcatVarCharScalar.class, + StringScalarConcatStringGroupCol.class, + CharScalarConcatStringGroupCol.class, VarCharScalarConcatStringGroupCol.class}) public class GenericUDFConcat extends GenericUDF { private transient ObjectInspector[] argumentOIs; private transient StringConverter[] stringConverters; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index adf55c8..6ca6286 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -41,10 +41,20 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarLongScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprLongScalarDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringScalar; + + /** * IF(expr1,expr2,expr3)
@@ -60,8 +70,14 @@ IfExprLongScalarDoubleColumn.class, IfExprDoubleScalarLongColumn.class, IfExprLongScalarLongScalar.class, IfExprDoubleScalarDoubleScalar.class, IfExprLongScalarDoubleScalar.class, IfExprDoubleScalarLongScalar.class, - IfExprStringColumnStringColumn.class, IfExprStringColumnStringScalar.class, - IfExprStringScalarStringColumn.class, IfExprStringScalarStringScalar.class + IfExprStringGroupColumnStringGroupColumn.class, + IfExprStringGroupColumnStringScalar.class, + IfExprStringGroupColumnCharScalar.class, IfExprStringGroupColumnVarCharScalar.class, + IfExprStringScalarStringGroupColumn.class, + IfExprCharScalarStringGroupColumn.class, IfExprVarCharScalarStringGroupColumn.class, + IfExprStringScalarStringScalar.class, + IfExprStringScalarCharScalar.class, IfExprStringScalarVarCharScalar.class, + IfExprCharScalarStringScalar.class, IfExprVarCharScalarStringScalar.class, }) public class GenericUDFIf extends GenericUDF { private transient ObjectInspector[] argumentOIs; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java index cf104d3..bf00d71 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqual.java @@ -34,9 +34,14 @@ DoubleColEqualLongScalar.class, DoubleColEqualDoubleScalar.class, LongScalarEqualLongColumn.class, LongScalarEqualDoubleColumn.class, DoubleScalarEqualLongColumn.class, DoubleScalarEqualDoubleColumn.class, - StringColEqualStringColumn.class, StringColEqualStringScalar.class, - StringScalarEqualStringColumn.class, FilterStringColEqualStringColumn.class, - FilterStringColEqualStringScalar.class, FilterStringScalarEqualStringColumn.class, + StringGroupColEqualStringGroupColumn.class, FilterStringGroupColEqualStringGroupColumn.class, + StringGroupColEqualStringScalar.class, + StringGroupColEqualVarCharScalar.class, StringGroupColEqualCharScalar.class, + StringScalarEqualStringGroupColumn.class, + VarCharScalarEqualStringGroupColumn.class, CharScalarEqualStringGroupColumn.class, + FilterStringGroupColEqualStringScalar.class, FilterStringScalarEqualStringGroupColumn.class, + FilterStringGroupColEqualVarCharScalar.class, FilterVarCharScalarEqualStringGroupColumn.class, + FilterStringGroupColEqualCharScalar.class, FilterCharScalarEqualStringGroupColumn.class, FilterLongColEqualLongColumn.class, FilterLongColEqualDoubleColumn.class, FilterDoubleColEqualLongColumn.class, FilterDoubleColEqualDoubleColumn.class, FilterLongColEqualLongScalar.class, FilterLongColEqualDoubleScalar.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java index 9f8de39..fd612d0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrGreaterThan.java @@ -35,9 +35,14 @@ DoubleColGreaterEqualLongScalar.class, DoubleColGreaterEqualDoubleScalar.class, LongScalarGreaterEqualLongColumn.class, LongScalarGreaterEqualDoubleColumn.class, DoubleScalarGreaterEqualLongColumn.class, DoubleScalarGreaterEqualDoubleColumn.class, - StringColGreaterEqualStringColumn.class, StringColGreaterEqualStringScalar.class, - StringScalarGreaterEqualStringColumn.class, FilterStringColGreaterEqualStringColumn.class, - FilterStringColGreaterEqualStringScalar.class, FilterStringScalarGreaterEqualStringColumn.class, + StringGroupColGreaterEqualStringGroupColumn.class, FilterStringGroupColGreaterEqualStringGroupColumn.class, + StringGroupColGreaterEqualStringScalar.class, + StringGroupColGreaterEqualVarCharScalar.class, StringGroupColGreaterEqualCharScalar.class, + StringScalarGreaterEqualStringGroupColumn.class, + VarCharScalarGreaterEqualStringGroupColumn.class, CharScalarGreaterEqualStringGroupColumn.class, + FilterStringGroupColGreaterEqualStringScalar.class, FilterStringScalarGreaterEqualStringGroupColumn.class, + FilterStringGroupColGreaterEqualVarCharScalar.class, FilterVarCharScalarGreaterEqualStringGroupColumn.class, + FilterStringGroupColGreaterEqualCharScalar.class, FilterCharScalarGreaterEqualStringGroupColumn.class, FilterLongColGreaterEqualLongColumn.class, FilterLongColGreaterEqualDoubleColumn.class, FilterDoubleColGreaterEqualLongColumn.class, FilterDoubleColGreaterEqualDoubleColumn.class, FilterLongColGreaterEqualLongScalar.class, FilterLongColGreaterEqualDoubleScalar.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java index b6d4d56..e1add92 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPEqualOrLessThan.java @@ -35,9 +35,14 @@ DoubleColLessEqualLongScalar.class, DoubleColLessEqualDoubleScalar.class, LongScalarLessEqualLongColumn.class, LongScalarLessEqualDoubleColumn.class, DoubleScalarLessEqualLongColumn.class, DoubleScalarLessEqualDoubleColumn.class, - StringColLessEqualStringColumn.class, StringColLessEqualStringScalar.class, - StringScalarLessEqualStringColumn.class, FilterStringColLessEqualStringColumn.class, - FilterStringColLessEqualStringScalar.class, FilterStringScalarLessEqualStringColumn.class, + StringGroupColLessEqualStringGroupColumn.class, FilterStringGroupColLessEqualStringGroupColumn.class, + StringGroupColLessEqualStringScalar.class, + StringGroupColLessEqualVarCharScalar.class, StringGroupColLessEqualCharScalar.class, + StringScalarLessEqualStringGroupColumn.class, + VarCharScalarLessEqualStringGroupColumn.class, CharScalarLessEqualStringGroupColumn.class, + FilterStringGroupColLessEqualStringScalar.class, FilterStringScalarLessEqualStringGroupColumn.class, + FilterStringGroupColLessEqualVarCharScalar.class, FilterVarCharScalarLessEqualStringGroupColumn.class, + FilterStringGroupColLessEqualCharScalar.class, FilterCharScalarLessEqualStringGroupColumn.class, FilterLongColLessEqualLongColumn.class, FilterLongColLessEqualDoubleColumn.class, FilterDoubleColLessEqualLongColumn.class, FilterDoubleColLessEqualDoubleColumn.class, FilterLongColLessEqualLongScalar.class, FilterLongColLessEqualDoubleScalar.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java index 3ef7b44..ff8cfe7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPGreaterThan.java @@ -35,9 +35,14 @@ DoubleColGreaterLongScalar.class, DoubleColGreaterDoubleScalar.class, LongScalarGreaterLongColumn.class, LongScalarGreaterDoubleColumn.class, DoubleScalarGreaterLongColumn.class, DoubleScalarGreaterDoubleColumn.class, - StringColGreaterStringColumn.class, StringColGreaterStringScalar.class, - StringScalarGreaterStringColumn.class, FilterStringColGreaterStringColumn.class, - FilterStringColGreaterStringScalar.class, FilterStringScalarGreaterStringColumn.class, + StringGroupColGreaterStringGroupColumn.class, FilterStringGroupColGreaterStringGroupColumn.class, + StringGroupColGreaterStringScalar.class, + StringGroupColGreaterVarCharScalar.class, StringGroupColGreaterCharScalar.class, + StringScalarGreaterStringGroupColumn.class, + VarCharScalarGreaterStringGroupColumn.class, CharScalarGreaterStringGroupColumn.class, + FilterStringGroupColGreaterStringScalar.class, FilterStringScalarGreaterStringGroupColumn.class, + FilterStringGroupColGreaterVarCharScalar.class, FilterVarCharScalarGreaterStringGroupColumn.class, + FilterStringGroupColGreaterCharScalar.class, FilterCharScalarGreaterStringGroupColumn.class, FilterLongColGreaterLongColumn.class, FilterLongColGreaterDoubleColumn.class, FilterDoubleColGreaterLongColumn.class, FilterDoubleColGreaterDoubleColumn.class, FilterLongColGreaterLongScalar.class, FilterLongColGreaterDoubleScalar.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java index 27c983e..905dcc5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPLessThan.java @@ -35,9 +35,14 @@ DoubleColLessLongScalar.class, DoubleColLessDoubleScalar.class, LongScalarLessLongColumn.class, LongScalarLessDoubleColumn.class, DoubleScalarLessLongColumn.class, DoubleScalarLessDoubleColumn.class, - StringColLessStringColumn.class, StringColLessStringScalar.class, - StringScalarLessStringColumn.class, FilterStringColLessStringColumn.class, - FilterStringColLessStringScalar.class, FilterStringScalarLessStringColumn.class, + StringGroupColLessStringGroupColumn.class, FilterStringGroupColLessStringGroupColumn.class, + StringGroupColLessStringScalar.class, + StringGroupColLessVarCharScalar.class, StringGroupColLessCharScalar.class, + StringScalarLessStringGroupColumn.class, + VarCharScalarLessStringGroupColumn.class, CharScalarLessStringGroupColumn.class, + FilterStringGroupColLessStringScalar.class, FilterStringScalarLessStringGroupColumn.class, + FilterStringGroupColLessVarCharScalar.class, FilterVarCharScalarLessStringGroupColumn.class, + FilterStringGroupColLessCharScalar.class, FilterCharScalarLessStringGroupColumn.class, FilterLongColLessLongColumn.class, FilterLongColLessDoubleColumn.class, FilterDoubleColLessLongColumn.class, FilterDoubleColLessDoubleColumn.class, FilterLongColLessLongScalar.class, FilterLongColLessDoubleScalar.class, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java index d604cd5..ab2c159 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOPNotEqual.java @@ -34,9 +34,14 @@ DoubleColNotEqualLongScalar.class, DoubleColNotEqualDoubleScalar.class, LongScalarNotEqualLongColumn.class, LongScalarNotEqualDoubleColumn.class, DoubleScalarNotEqualLongColumn.class, DoubleScalarNotEqualDoubleColumn.class, - StringColNotEqualStringColumn.class, StringColNotEqualStringScalar.class, - StringScalarNotEqualStringColumn.class, FilterStringColNotEqualStringColumn.class, - FilterStringColNotEqualStringScalar.class, FilterStringScalarNotEqualStringColumn.class, + StringGroupColNotEqualStringGroupColumn.class, FilterStringGroupColNotEqualStringGroupColumn.class, + StringGroupColNotEqualStringScalar.class, + StringGroupColNotEqualVarCharScalar.class, StringGroupColNotEqualCharScalar.class, + StringScalarNotEqualStringGroupColumn.class, + VarCharScalarNotEqualStringGroupColumn.class, CharScalarNotEqualStringGroupColumn.class, + FilterStringGroupColNotEqualStringScalar.class, FilterStringScalarNotEqualStringGroupColumn.class, + FilterStringGroupColNotEqualVarCharScalar.class, FilterVarCharScalarNotEqualStringGroupColumn.class, + FilterStringGroupColNotEqualCharScalar.class, FilterCharScalarNotEqualStringGroupColumn.class, FilterLongColNotEqualLongColumn.class, FilterLongColNotEqualDoubleColumn.class, FilterDoubleColNotEqualLongColumn.class, FilterDoubleColNotEqualDoubleColumn.class, FilterLongColNotEqualLongScalar.class, FilterLongColNotEqualDoubleScalar.class, diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java index 0612647..0c865dc 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizationContext.java @@ -29,7 +29,9 @@ import junit.framework.Assert; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColAndCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.ColOrCol; import org.apache.hadoop.hive.ql.exec.vector.expressions.DoubleColumnInList; @@ -38,10 +40,14 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncLogWithBaseLongToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.FuncPowerDoubleToDouble; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCharScalarStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprVarCharScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNotNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.IsNull; import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColumnInList; @@ -80,10 +86,14 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterLongScalarGreaterLongColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterStringScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharColumnNotBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnBetween; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharColumnNotBetween; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncLnDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncRoundDoubleToDouble; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FuncSinDoubleToDouble; @@ -128,6 +138,8 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUnixTimeStamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.Test; @@ -289,11 +301,12 @@ public void testStringFilterExpressions() throws HiveException { VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); - assertTrue(ve instanceof FilterStringColGreaterStringScalar); + assertTrue(ve instanceof FilterStringGroupColGreaterStringScalar); } @Test public void testFilterStringColCompareStringColumnExpressions() throws HiveException { + // Strings test ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false); ExprNodeColumnDesc col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false); @@ -313,7 +326,97 @@ public void testFilterStringColCompareStringColumnExpressions() throws HiveExcep VectorExpression ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); - assertTrue(ve instanceof FilterStringColGreaterStringColumn); + assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn); + + // 2 CHAR test + CharTypeInfo charTypeInfo = new CharTypeInfo(10); + col1Expr = new ExprNodeColumnDesc(charTypeInfo, "col1", "table", false); + col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false); + + udf = new GenericUDFOPGreaterThan(); + exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(col2Expr); + exprDesc.setChildren(children1); + + vc = new VectorizationContext(columnMap, 2); + + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + + assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn); + + // 2 VARCHAR test + VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10); + col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false); + col2Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col2", "table", false); + + udf = new GenericUDFOPGreaterThan(); + exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(col2Expr); + exprDesc.setChildren(children1); + + vc = new VectorizationContext(columnMap, 2); + + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + + assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn); + + // Some mix tests (STRING, CHAR), (VARCHAR, CHAR), (VARCHAR, STRING)... + col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false); + col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false); + + udf = new GenericUDFOPGreaterThan(); + exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(col2Expr); + exprDesc.setChildren(children1); + + vc = new VectorizationContext(columnMap, 2); + + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + + assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn); + + col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false); + col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false); + + udf = new GenericUDFOPGreaterThan(); + exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(col2Expr); + exprDesc.setChildren(children1); + + vc = new VectorizationContext(columnMap, 2); + + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + + assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn); + + col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false); + col2Expr = new ExprNodeColumnDesc(String.class, "col2", "table", false); + + udf = new GenericUDFOPGreaterThan(); + exprDesc = new ExprNodeGenericFuncDesc(); + exprDesc.setGenericUDF(udf); + children1 = new ArrayList(2); + children1.add(col1Expr); + children1.add(col2Expr); + exprDesc.setChildren(children1); + + vc = new VectorizationContext(columnMap, 2); + + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + + assertTrue(ve instanceof FilterStringGroupColGreaterStringGroupColumn); } @Test @@ -908,6 +1011,7 @@ public void testTimeStampUdfs() throws HiveException { @Test public void testBetweenFilters() throws HiveException { + // string tests ExprNodeColumnDesc col1Expr = new ExprNodeColumnDesc(String.class, "col1", "table", false); ExprNodeConstantDesc constDesc = new ExprNodeConstantDesc("Alpha"); ExprNodeConstantDesc constDesc2 = new ExprNodeConstantDesc("Bravo"); @@ -934,6 +1038,56 @@ public void testBetweenFilters() throws HiveException { ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); assertTrue(ve instanceof FilterStringColumnNotBetween); + // CHAR tests + CharTypeInfo charTypeInfo = new CharTypeInfo(10); + col1Expr = new ExprNodeColumnDesc(charTypeInfo, "col1", "table", false); + constDesc = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Alpha", 10)); + constDesc2 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Bravo", 10)); + + // CHAR BETWEEN + udf = new GenericUDFBetween(); + children1 = new ArrayList(); + children1.add(new ExprNodeConstantDesc(new Boolean(false))); // no NOT keyword + children1.add(col1Expr); + children1.add(constDesc); + children1.add(constDesc2); + exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, + children1); + + vc = new VectorizationContext(columnMap, 2); + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + assertTrue(ve instanceof FilterCharColumnBetween); + + // CHAR NOT BETWEEN + children1.set(0, new ExprNodeConstantDesc(new Boolean(true))); // has NOT keyword + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + assertTrue(ve instanceof FilterCharColumnNotBetween); + + // VARCHAR tests + VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10); + col1Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col1", "table", false); + constDesc = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10)); + constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Bravo", 10)); + + // VARCHAR BETWEEN + udf = new GenericUDFBetween(); + children1 = new ArrayList(); + children1.add(new ExprNodeConstantDesc(new Boolean(false))); // no NOT keyword + children1.add(col1Expr); + children1.add(constDesc); + children1.add(constDesc2); + exprDesc = new ExprNodeGenericFuncDesc(TypeInfoFactory.booleanTypeInfo, udf, + children1); + + vc = new VectorizationContext(columnMap, 2); + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + assertTrue(ve instanceof FilterVarCharColumnBetween); + + // VARCHAR NOT BETWEEN + children1.set(0, new ExprNodeConstantDesc(new Boolean(true))); // has NOT keyword + ve = vc.getVectorExpression(exprDesc, VectorExpressionDescriptor.Mode.FILTER); + assertTrue(ve instanceof FilterVarCharColumnNotBetween); + // long BETWEEN children1.set(0, new ExprNodeConstantDesc(new Boolean(false))); children1.set(1, new ExprNodeColumnDesc(Long.class, "col1", "table", false)); @@ -1173,12 +1327,12 @@ public void testIfConditionalExprs() throws HiveException { children1.set(1, col2Expr); children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); - assertTrue(ve instanceof IfExprStringColumnStringColumn); + assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn); // column/scalar children1.set(2, constDesc3); ve = vc.getVectorExpression(exprDesc); - assertTrue(ve instanceof IfExprStringColumnStringScalar); + assertTrue(ve instanceof IfExprStringGroupColumnStringScalar); // scalar/scalar children1.set(1, constDesc2); @@ -1188,7 +1342,62 @@ public void testIfConditionalExprs() throws HiveException { // scalar/column children1.set(2, col3Expr); ve = vc.getVectorExpression(exprDesc); - assertTrue(ve instanceof IfExprStringScalarStringColumn); - } + assertTrue(ve instanceof IfExprStringScalarStringGroupColumn); + // test for CHAR type + CharTypeInfo charTypeInfo = new CharTypeInfo(10); + constDesc2 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Alpha", 10)); + constDesc3 = new ExprNodeConstantDesc(charTypeInfo, new HiveChar("Bravo", 10)); + col2Expr = new ExprNodeColumnDesc(charTypeInfo, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(charTypeInfo, "col3", "table", false); + + // column/column + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn); + + // column/scalar + children1.set(2, constDesc3); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringGroupColumnCharScalar); + + // scalar/scalar + children1.set(1, constDesc2); +// ve = vc.getVectorExpression(exprDesc); +// assertTrue(ve instanceof IfExprCharScalarCharScalar); + + // scalar/column + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprCharScalarStringGroupColumn); + + // test for VARCHAR type + VarcharTypeInfo varcharTypeInfo = new VarcharTypeInfo(10); + constDesc2 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Alpha", 10)); + constDesc3 = new ExprNodeConstantDesc(varcharTypeInfo, new HiveVarchar("Bravo", 10)); + col2Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col2", "table", false); + col3Expr = new ExprNodeColumnDesc(varcharTypeInfo, "col3", "table", false); + + // column/column + children1.set(1, col2Expr); + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringGroupColumnStringGroupColumn); + + // column/scalar + children1.set(2, constDesc3); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprStringGroupColumnVarCharScalar); + + // scalar/scalar + children1.set(1, constDesc2); +// ve = vc.getVectorExpression(exprDesc); +// assertTrue(ve instanceof IfExprVarCharScalarVarCharScalar); + + // scalar/column + children1.set(2, col3Expr); + ve = vc.getVectorExpression(exprDesc); + assertTrue(ve instanceof IfExprVarCharScalarStringGroupColumn); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java index fccac66..a711b55 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorConditionalExpressions.java @@ -32,10 +32,10 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleScalar; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleScalarDoubleColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.IfExprDoubleColumnDoubleScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringGroupColumnStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringGroupColumn; import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprStringScalarStringColumn; import org.junit.Test; @@ -388,7 +388,7 @@ public void testDoubleColumnScalarIfExpr() { @Test public void testIfExprStringColumnStringColumn() { VectorizedRowBatch batch = getBatch1Long3BytesVectors(); - VectorExpression expr = new IfExprStringColumnStringColumn(0, 1, 2, 3); + VectorExpression expr = new IfExprStringGroupColumnStringGroupColumn(0, 1, 2, 3); BytesColumnVector r = (BytesColumnVector) batch.cols[3]; expr.evaluate(batch); assertTrue(getString(r, 0).equals("arg3_0")); @@ -474,7 +474,7 @@ public void testIfExprStringColumnStringColumn() { public void testIfExprStringColumnStringScalar() { VectorizedRowBatch batch = getBatch1Long3BytesVectors(); byte[] scalar = getUTF8Bytes("scalar"); - VectorExpression expr = new IfExprStringColumnStringScalar(0, 1, scalar, 3); + VectorExpression expr = new IfExprStringGroupColumnStringScalar(0, 1, scalar, 3); BytesColumnVector r = (BytesColumnVector) batch.cols[3]; expr.evaluate(batch); assertTrue(getString(r, 0).equals("scalar")); @@ -498,7 +498,7 @@ public void testIfExprStringColumnStringScalar() { public void testIfExprStringScalarStringColumn() { VectorizedRowBatch batch = getBatch1Long3BytesVectors(); byte[] scalar = getUTF8Bytes("scalar"); - VectorExpression expr = new IfExprStringScalarStringColumn(0,scalar, 2, 3); + VectorExpression expr = new IfExprStringScalarStringGroupColumn(0,scalar, 2, 3); BytesColumnVector r = (BytesColumnVector) batch.cols[3]; expr.evaluate(batch); assertTrue(getString(r, 0).equals("arg3_0")); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java index 849c9e8..74f4671 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorGenericDateExpressions.java @@ -27,7 +27,6 @@ import org.junit.Assert; import org.junit.Test; - import java.io.UnsupportedEncodingException; import java.sql.Date; import java.text.SimpleDateFormat; @@ -136,9 +135,12 @@ private ColumnVector castTo(LongColumnVector date, VectorExpression.Type type) { return toTimestamp(date); case STRING: + case CHAR: + case VARCHAR: return toString(date); + default: + throw new Error("Unsupported input type " + type.name()); } - return null; } private void testDateAddColScalar(VectorExpression.Type colType1, boolean isPositive) { @@ -214,8 +216,12 @@ private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnV udf = new VectorUDFDateAddScalarCol(toTimestamp(scalar1), 0, 1); break; case STRING: + case CHAR: + case VARCHAR: udf = new VectorUDFDateAddScalarCol(toString(scalar1), 0, 1); break; + default: + throw new Error("Invalid input type: " + colType1.name()); } } else { switch (colType1) { @@ -226,8 +232,12 @@ private void validateDateAdd(VectorizedRowBatch batch, long scalar1, LongColumnV udf = new VectorUDFDateSubScalarCol(toTimestamp(scalar1), 0, 1); break; case STRING: + case CHAR: + case VARCHAR: udf = new VectorUDFDateSubScalarCol(toString(scalar1), 0, 1); break; + default: + throw new Error("Invalid input type: " + colType1.name()); } } udf.setInputTypes(colType1, VectorExpression.Type.OTHER); @@ -694,7 +704,9 @@ public void testDate() { private void validateToDate(VectorizedRowBatch batch, VectorExpression.Type colType, LongColumnVector date) { VectorExpression udf; - if (colType == VectorExpression.Type.STRING) { + if (colType == VectorExpression.Type.STRING || + colType == VectorExpression.Type.CHAR || + colType == VectorExpression.Type.VARCHAR) { udf = new CastStringToDate(0, 1); } else { udf = new CastLongToDate(0, 1); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java index dd9ab1c..a51837e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorStringExpressions.java @@ -23,19 +23,38 @@ import junit.framework.Assert; +import org.apache.commons.codec.binary.Hex; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColEqualStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColGreaterEqualStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringColLessStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarEqualStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarGreaterStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarLessEqualStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColEqualStringScalar; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringColLessStringColumn; -import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarEqualStringColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CharScalarEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarGreaterStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterCharScalarLessEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColEqualVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColGreaterEqualVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringGroupColLessVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarGreaterStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterStringScalarLessEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarGreaterStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.FilterVarCharScalarLessEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualStringScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColEqualVarCharScalar; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringGroupColLessStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.StringScalarEqualStringGroupColumn; +import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.VarCharScalarEqualStringGroupColumn; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.Text; import org.junit.Test; @@ -63,6 +82,8 @@ private static byte[] blanksRight; private static byte[] blanksBoth; private static byte[] blankString; + private static byte[] blankRanges; + private static byte[] ascii_sentence; static { try { @@ -84,6 +105,11 @@ blanksRight = "foo ".getBytes("UTF-8"); blanksBoth = " foo ".getBytes("UTF-8"); blankString = " ".getBytes("UTF-8"); + blankRanges = " more than a bargain ".getBytes("UTF-8"); + //012345678901234567890123456789 + ascii_sentence = "The fox trotted over the fence.".getBytes("UTF-8"); + //0123456789012345678901234567890 + } catch (UnsupportedEncodingException e) { e.printStackTrace(); } @@ -107,6 +133,3032 @@ static void addMultiByteChars(byte[] b) { b[i++] = (byte) 0xA2; } + //------------------------------------------------------------- + + // total characters = 2; byte length = 3 + static void addMultiByteCharLeftPadded1_1(byte[] b) { + int i = 0; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xD0; // Cyrillic Capital DJE U+402 (2 bytes) + b[i++] = (byte) 0x82; + } + + // total characters = 3; byte length = 9 + static void addMultiByteCharLeftPadded1_2(byte[] b) { + int i = 0; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xF0; // Smiling Face with Open Mouth and Smiling Eyes U+1F604 (4 bytes) + b[i++] = (byte) 0x9F; + b[i++] = (byte) 0x98; + b[i++] = (byte) 0x84; + b[i++] = (byte) 0xF0; // Grimacing Face U+1F62C (4 bytes) + b[i++] = (byte) 0x9F; + b[i++] = (byte) 0x98; + b[i++] = (byte) 0xAC; + } + + // total characters = 4; byte length = 6 + static void addMultiByteCharLeftPadded3_1(byte[] b) { + int i = 0; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xE4; // Asian character U+4824 (3 bytes) + b[i++] = (byte) 0xA0; + b[i++] = (byte) 0xA4; + } + + //------------------------------------------------------------- + + // total characters = 2; byte length = 4 + static void addMultiByteCharRightPadded1_1(byte[] b) { + int i = 0; + b[i++] = (byte) 0xE0; // Tamil Om U+0BD0 (3 bytes) + b[i++] = (byte) 0xAF; + b[i++] = (byte) 0x90; + b[i++] = (byte) 0x20; // blank " " (1 byte) + } + + // total characters = 3; byte length = 5 + static void addMultiByteCharRightPadded1_2(byte[] b) { + int i = 0; + b[i++] = (byte) 0xEA; // Va Syllable MEE U+A521 (3 bytes) + b[i++] = (byte) 0x94; + b[i++] = (byte) 0xA1; + b[i++] = (byte) 0x5A; // Latin Capital Letter Z U+005A (1 bytes) + b[i++] = (byte) 0x20; // blank " " (1 byte) + } + + // total characters = 4; byte length = 9 + static void addMultiByteCharRightPadded1_3(byte[] b) { + int i = 0; + b[i++] = (byte) 0xCC; // COMBINING ACUTE ACENT U+0301 (2 bytes) + b[i++] = (byte) 0x81; + b[i++] = (byte) 0xE0; // DEVENAGARI LETTER KA U+0915 (3 bytes) + b[i++] = (byte) 0xA4; + b[i++] = (byte) 0x95; + b[i++] = (byte) 0xE0; // DEVENAGARI SIGN VIRAMA U+094D (3 bytes) + b[i++] = (byte) 0xA5; + b[i++] = (byte) 0x8D; + b[i++] = (byte) 0x20; // blank " " (1 byte) + } + + // total characters = 10; byte length = 26 + static int addMultiByteCharSentenceOne(byte[] b, int start) { + int i = start; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER THA U+1992 (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x92; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW XA U+1986 (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x86; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH MA U+1996 (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x96; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW QA U+1981 (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x81; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW BA U+19A5 (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0xA5; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER HIGH LA U+199C (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x9C; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW KVA U+19A8 (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0xA8; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x9D; + return i; + } + + // total characters = 13; byte length = 24 + static int addMultiByteCharSentenceTwo(byte[] b, int start) { + int i = start; + b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED A U+0250 (2 bytes) + b[i++] = (byte) 0x90; + b[i++] = (byte) 0xC9; // LATIN SMALL LETTER GAMMA U+0263 (2 bytes) + b[i++] = (byte) 0xA3; + b[i++] = (byte) 0xC9; // LATIN SMALL LETTER TURNED M U+026F (2 bytes) + b[i++] = (byte) 0xAF; + b[i++] = (byte) 0xCA; // LATIN SMALL LETTER S WITH HOOK U+0282 (2 bytes) + b[i++] = (byte) 0x82; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xCA; // LATIN LETTER SMALL CAPITAL L U+029F (2 bytes) + b[i++] = (byte) 0x9F; + b[i++] = (byte) 0xCB; // MODIFIER LETTER TRIANGULAR COLON U+02D0 (2 bytes) + b[i++] = (byte) 0x90; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xCB; // RING ABOVE U+02DA (2 bytes) + b[i++] = (byte) 0x9A; + b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL L U+02E1 (2 bytes) + b[i++] = (byte) 0xA1; + b[i++] = (byte) 0xCB; // MODIFIER LETTER SMALL X U+02E3 (2 bytes) + b[i++] = (byte) 0xA3; + b[i++] = (byte) 0xCB; // MODIFIER LETTER UP ARROWHEAD U+02C4 (2 bytes) + b[i++] = (byte) 0x84; + b[i++] = (byte) 0x2E; // FULL STOP "." (1 byte) + return i; + } + + // total characters = 17; byte length = 30 + static int addMultiByteCharSentenceBlankRanges(byte[] b, int start) { + int i = start; + b[i++] = (byte) 0xF0; // INSCRIPTIONAL YODH U+10B49 (4 bytes) + b[i++] = (byte) 0x90; + b[i++] = (byte) 0xAD; + b[i++] = (byte) 0x89; + b[i++] = (byte) 0xE1; // NEW TAI LUE LETTER LOW FA U+199D (3 bytes) + b[i++] = (byte) 0xA6; + b[i++] = (byte) 0x9D; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x2D; // hyphen-minus "-" U-002D (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x60; // grave accent "-" U-0060 (1 byte) + b[i++] = (byte) 0xE2; // BLACK SUN WITH RAYS U+2600 (3 bytes) + b[i++] = (byte) 0x98; + b[i++] = (byte) 0x80; + b[i++] = (byte) 0xE2; // BALLOT BOX WITH X U+2612 (3 bytes) + b[i++] = (byte) 0x98; + b[i++] = (byte) 0x92; + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0x20; // blank " " (1 byte) + b[i++] = (byte) 0xE2; // WHITE START U+2606 (3 bytes) + b[i++] = (byte) 0x98; + b[i++] = (byte) 0x86; + b[i++] = (byte) 0xE2; // WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE U+26FF (3 bytes) + b[i++] = (byte) 0x9B; + b[i++] = (byte) 0xBF; + return i; + } + + + static int addPads(byte[] b, int start, int count) { + int i = start; + int end = start + count; + for ( ; i < end; i++) { + b[i] = (byte) 0x20; // blank " " (1 byte) + } + return i; + } + + private boolean vectorEqual(BytesColumnVector vector, int i, byte[] bytes, int offset, int length) { + byte[] bytesSlice = new byte[length]; + System.arraycopy(bytes, offset, bytesSlice, 0, length); + int vectorLength = vector.length[i]; + byte[] vectorSlice = new byte[vectorLength]; + System.arraycopy(vector.vector[i], vector.start[i], vectorSlice, 0, vectorLength); + boolean equals = Arrays.equals(bytesSlice, vectorSlice); + if (!equals) { + System.out.println("vectorEqual offset " + offset + " length " + length + " vectorSlice.length " + vectorSlice.length); + System.out.println("vectorEqual bytesSlice " + Hex.encodeHexString(bytesSlice)); + System.out.println("vectorEqual vectorSlice " + Hex.encodeHexString(vectorSlice)); + } + return equals; + } + + private int vectorCharacterCount(BytesColumnVector vector, int i) { + return StringExpr.characterCount(vector.vector[i], vector.start[i], vector.length[i]); + } + + @Test + // Test basic assign to vector. + public void testAssignBytesColumnVector() { + BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + outV.initBuffer(35); // initialize with estimated element size 35 + + int i = 0; + + int expectedResultLen; + + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.assign(outV, i, blue, 0, blue.length); + expectedResultLen = blue.length; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.assign(outV, i, redgreen, 0, redgreen.length); + expectedResultLen = redgreen.length; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.assign(outV, i, ascii_sentence, 0, ascii_sentence.length); + expectedResultLen = ascii_sentence.length; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.assign(outV, i, blanksLeft, 0, blanksLeft.length); + expectedResultLen = blanksLeft.length; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen); + expectedResultLen = sentenceBlankRangesLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + StringExpr.assign(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3); + expectedResultLen = sentenceBlankRangesLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + + // Some non-zero offsets. + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, sentenceBlankRangesLen - 4) == 16); + StringExpr.assign(outV, i, sentenceBlankRanges, 4, sentenceBlankRangesLen - 4); + expectedResultLen = sentenceBlankRangesLen - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 16); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + StringExpr.assign(outV, i, sentenceBlankRanges, 7, 17); + expectedResultLen = 17; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 13); + i++; + } + + @Test + // Test basic right trim of bytes slice. + public void testRightTrimBytesSlice() { + int resultLen; + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.rightTrim(blue, 0, blue.length); + Assert.assertTrue(resultLen == blue.length); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.rightTrim(redgreen, 0, redgreen.length); + Assert.assertTrue(resultLen == redgreen.length); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.rightTrim(ascii_sentence, 0, ascii_sentence.length); + Assert.assertTrue(resultLen == ascii_sentence.length); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.rightTrim(blanksLeft, 0, blanksLeft.length); + Assert.assertTrue(resultLen == blanksLeft.length); + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + resultLen = StringExpr.rightTrim(blanksRight, 0, blanksRight.length); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + resultLen = StringExpr.rightTrim(blanksBoth, 0, blanksBoth.length); + Assert.assertTrue(resultLen == 5); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5); + + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + resultLen = StringExpr.rightTrim(blankString, 0, blankString.length); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + resultLen = StringExpr.rightTrim(blankRanges, 0, blankRanges.length); + Assert.assertTrue(resultLen == blankRanges.length - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26); + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + resultLen = StringExpr.rightTrim(blanksRight, 1, blanksRight.length - 1); + Assert.assertTrue(resultLen == 2); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + resultLen = StringExpr.rightTrim(blanksBoth, 4, blanksBoth.length - 4); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + resultLen = StringExpr.rightTrim(blanksBoth, 5, blanksBoth.length -5 ); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + resultLen = StringExpr.rightTrim(blankString, 1, blankString.length - 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + resultLen = StringExpr.rightTrim(blankRanges, 4, blankRanges.length - 4); + Assert.assertTrue(resultLen == blankRanges.length - 4 -4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + resultLen = StringExpr.rightTrim(blankRanges, 6, blankRanges.length- 6); + Assert.assertTrue(resultLen == blankRanges.length - 6 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + resultLen = StringExpr.rightTrim(blankRanges, 7, blankRanges.length - 7); + Assert.assertTrue(resultLen == blankRanges.length - 7 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + resultLen = StringExpr.rightTrim(blankRanges, 7, 8 - 7); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0); + + // Multi-byte trims. + byte[] multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + resultLen = StringExpr.rightTrim(multiByte, 0, 4); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + resultLen = StringExpr.rightTrim(multiByte, 0, 5); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2); + + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + resultLen = StringExpr.rightTrim(multiByte, 0, 9); + Assert.assertTrue(resultLen == 8); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3); + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + resultLen = StringExpr.rightTrim(multiByte, 3, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + resultLen = StringExpr.rightTrim(multiByte, 3, 2); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1); + + byte[] sentenceOne = new byte[100]; + int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen); + Assert.assertTrue(resultLen == sentenceOneLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + resultLen = StringExpr.rightTrim(sentenceOne, 0, sentenceOneLen - 3); + Assert.assertTrue(resultLen == sentenceOneLen - 3); + + byte[] sentenceTwo = new byte[100]; + int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen); + Assert.assertTrue(resultLen == sentenceTwoLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + resultLen = StringExpr.rightTrim(sentenceTwo, 0, sentenceTwoLen - 5); + Assert.assertTrue(resultLen == sentenceTwoLen - 5); + + int start; + + // Left pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + resultLen = StringExpr.rightTrim(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3); + + byte[] sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen); + Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + resultLen = StringExpr.rightTrim(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5); + Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5); + + // Right pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + resultLen = StringExpr.rightTrim(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4); + + byte[] sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + resultLen = StringExpr.rightTrim(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1); + + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen); + Assert.assertTrue(resultLen == sentenceBlankRangesLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + resultLen = StringExpr.rightTrim(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3); + Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + resultLen = StringExpr.rightTrim(sentenceBlankRanges, 7, 17); + Assert.assertTrue(resultLen == 12); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8); + } + + @Test + // Test basic right trim to vector. + public void testRightTrimBytesColumnVector() { + BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + outV.initBuffer(30); // initialize with estimated element size 35 + + int i = 0; + int expectedResultLen; + + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.rightTrim(outV, i, blue, 0, blue.length); + expectedResultLen = blue.length; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 4); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.rightTrim(outV, i, redgreen, 0, redgreen.length); + expectedResultLen = redgreen.length; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.rightTrim(outV, i, ascii_sentence, 0, ascii_sentence.length); + expectedResultLen = ascii_sentence.length; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.rightTrim(outV, i, blanksLeft, 0, blanksLeft.length); + expectedResultLen = blanksLeft.length; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + StringExpr.rightTrim(outV, i, blanksRight, 0, blanksRight.length); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + StringExpr.rightTrim(outV, i, blanksBoth, 0, blanksBoth.length); + expectedResultLen = 5; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 5); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + StringExpr.rightTrim(outV, i, blankString, 0, blankString.length); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + StringExpr.rightTrim(outV, i, blankRanges, 0, blankRanges.length); + expectedResultLen = blankRanges.length - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 26); + i++; + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + StringExpr.rightTrim(outV, i, blanksRight, 1, blanksRight.length - 1); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + StringExpr.rightTrim(outV, i, blanksBoth, 4, blanksBoth.length - 4); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + StringExpr.rightTrim(outV, i, blanksBoth, 5, blanksBoth.length -5 ); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + StringExpr.rightTrim(outV, i, blankString, 1, blankString.length - 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + StringExpr.rightTrim(outV, i, blankRanges, 4, blankRanges.length - 4); + expectedResultLen = blankRanges.length - 4 -4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 22); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + StringExpr.rightTrim(outV, i, blankRanges, 6, blankRanges.length- 6); + expectedResultLen = blankRanges.length - 6 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 20); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + StringExpr.rightTrim(outV, i, blankRanges, 7, blankRanges.length - 7); + expectedResultLen = blankRanges.length - 7 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 19); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + StringExpr.rightTrim(outV, i, blankRanges, 7, 8 - 7); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + + // Multi-byte trims. + byte[] multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + StringExpr.rightTrim(outV, i, multiByte, 0, 4); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + StringExpr.rightTrim(outV, i, multiByte, 0, 5); + expectedResultLen = 4; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + StringExpr.rightTrim(outV, i, multiByte, 0, 9); + expectedResultLen = 8; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + StringExpr.rightTrim(outV, i, multiByte, 3, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + StringExpr.rightTrim(outV, i, multiByte, 3, 2); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + + byte[] sentenceOne = new byte[100]; + int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen); + expectedResultLen = sentenceOneLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + StringExpr.rightTrim(outV, i, sentenceOne, 0, sentenceOneLen - 3); + expectedResultLen = sentenceOneLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + + byte[] sentenceTwo = new byte[100]; + int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen); + expectedResultLen = sentenceTwoLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + StringExpr.rightTrim(outV, i, sentenceTwo, 0, sentenceTwoLen - 5); + expectedResultLen = sentenceTwoLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + + int start; + + // Left pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen); + expectedResultLen = sentenceOnePaddedLeftLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + StringExpr.rightTrim(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3); + expectedResultLen = sentenceOnePaddedLeftLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + + byte[] sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen); + expectedResultLen = sentenceTwoPaddedLeftLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + StringExpr.rightTrim(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5); + expectedResultLen = sentenceTwoPaddedLeftLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + + // Right pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen); + expectedResultLen = sentenceOnePaddedRightLen - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + StringExpr.rightTrim(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4); + expectedResultLen = sentenceOnePaddedRightLen - 3 - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + + byte[] sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen); + expectedResultLen = sentenceTwoPaddedRightLen - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + StringExpr.rightTrim(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1); + expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen); + expectedResultLen = sentenceBlankRangesLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + StringExpr.rightTrim(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3); + expectedResultLen = sentenceBlankRangesLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + StringExpr.rightTrim(outV, i, sentenceBlankRanges, 7, 17); + expectedResultLen = 12; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 8); + } + + @Test + // Test basic truncate of bytes slice. + public void testTruncateBytesSlice() { + int largeMaxLength = 100; + int resultLen; + + // No truncate (ASCII) -- maximum length large. + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.truncate(blue, 0, blue.length, largeMaxLength); + Assert.assertTrue(resultLen == blue.length); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, largeMaxLength); + Assert.assertTrue(resultLen == redgreen.length); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength); + Assert.assertTrue(resultLen == ascii_sentence.length); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, largeMaxLength); + Assert.assertTrue(resultLen == blanksLeft.length); + + // No truncate (ASCII) -- same maximum length. + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.truncate(blue, 0, blue.length, 4); + Assert.assertTrue(resultLen == blue.length); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 8); + Assert.assertTrue(resultLen == redgreen.length); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 31); + Assert.assertTrue(resultLen == ascii_sentence.length); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 5); + Assert.assertTrue(resultLen == blanksLeft.length); + + // Simple truncation. + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.truncate(blue, 0, blue.length, 3); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.truncate(redgreen, 0, redgreen.length, 6); + Assert.assertTrue(resultLen == 6); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.truncate(ascii_sentence, 0, ascii_sentence.length, 14); + Assert.assertTrue(resultLen == 14); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.truncate(blanksLeft, 0, blanksLeft.length, 2); + Assert.assertTrue(resultLen == 2); + + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + resultLen = StringExpr.truncate(blanksRight, 0, blanksRight.length, 4); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + resultLen = StringExpr.truncate(blanksBoth, 0, blanksBoth.length, 2); + Assert.assertTrue(resultLen == 2); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 2); + + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + resultLen = StringExpr.truncate(blankString, 0, blankString.length, 1); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 1); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + resultLen = StringExpr.truncate(blankRanges, 0, blankRanges.length, 29); + Assert.assertTrue(resultLen == 29); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 29); + + // Offset truncation. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + resultLen = StringExpr.truncate(blanksRight, 1, blanksRight.length - 1, 3); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + resultLen = StringExpr.truncate(blanksBoth, 4, blanksBoth.length - 4, 2); + Assert.assertTrue(resultLen == 2); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 2); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + resultLen = StringExpr.truncate(blanksBoth, 5, blanksBoth.length -5, 1); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 1); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + resultLen = StringExpr.truncate(blankRanges, 4, blankRanges.length - 4, 22); + Assert.assertTrue(resultLen == 22); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + resultLen = StringExpr.truncate(blankRanges, 6, blankRanges.length- 6, 7); + Assert.assertTrue(resultLen == 7); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 7); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + resultLen = StringExpr.truncate(blankRanges, 7, blankRanges.length - 7, 20); + Assert.assertTrue(resultLen == 20); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 20); + + // Multi-byte truncation. + byte[] multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + resultLen = StringExpr.truncate(multiByte, 0, 4, 1); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + resultLen = StringExpr.truncate(multiByte, 0, 5, 2); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2); + + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + resultLen = StringExpr.truncate(multiByte, 0, 9, 2); + Assert.assertTrue(resultLen == 5); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + resultLen = StringExpr.truncate(multiByte, 3, 2, 1); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1); + + byte[] sentenceOne = new byte[100]; + int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + resultLen = StringExpr.truncate(sentenceOne, 0, sentenceOneLen, 8); + Assert.assertTrue(resultLen == 20); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + resultLen = StringExpr.truncate(sentenceOne, 0, sentenceOneLen - 3, 3); + Assert.assertTrue(resultLen == 9); + + byte[] sentenceTwo = new byte[100]; + int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + resultLen = StringExpr.truncate(sentenceTwo, 0, sentenceTwoLen, 9); + Assert.assertTrue(resultLen == 16); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + resultLen = StringExpr.truncate(sentenceTwo, 0, sentenceTwoLen - 5, 6); + Assert.assertTrue(resultLen == 11); + + int start; + + // Left pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + resultLen = StringExpr.truncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 4); + Assert.assertTrue(resultLen == 6); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + resultLen = StringExpr.truncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 7); + Assert.assertTrue(resultLen == 13); + + byte[] sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + resultLen = StringExpr.truncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 14); + Assert.assertTrue(resultLen == 24); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + resultLen = StringExpr.truncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 9); + Assert.assertTrue(resultLen == 15); + + // Right pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + resultLen = StringExpr.truncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 1); + Assert.assertTrue(resultLen == 3); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + resultLen = StringExpr.truncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 5); + Assert.assertTrue(resultLen == 13); + + byte[] sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + resultLen = StringExpr.truncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 6); + Assert.assertTrue(resultLen == 11); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + resultLen = StringExpr.truncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 8); + Assert.assertTrue(resultLen == 14); + + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + resultLen = StringExpr.truncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, 4); + Assert.assertTrue(resultLen == 9); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + resultLen = StringExpr.truncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 14); + Assert.assertTrue(resultLen == 23); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + resultLen = StringExpr.truncate(sentenceBlankRanges, 7, 17, 11); + Assert.assertTrue(resultLen == 15); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 11); + } + + @Test + // Test basic truncate to vector. + public void testTruncateBytesColumnVector() { + BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + outV.initBuffer(35); // initialize with estimated element size 35 + + int i = 0; + int largeMaxLength = 100; + + int expectedResultLen; + + // No truncate (ASCII) -- maximum length large. + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.truncate(outV, i, blue, 0, blue.length, largeMaxLength); + expectedResultLen = blue.length; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 4); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.truncate(outV, i, redgreen, 0, redgreen.length, largeMaxLength); + expectedResultLen = redgreen.length; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.truncate(outV, i, ascii_sentence, 0, ascii_sentence.length, largeMaxLength); + expectedResultLen = ascii_sentence.length; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.truncate(outV, i, blanksLeft, 0, blanksLeft.length, largeMaxLength); + expectedResultLen = blanksLeft.length; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // No truncate (ASCII) -- same maximum length. + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.truncate(outV, i, blue, 0, blue.length, 4); + expectedResultLen = blue.length; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 4); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.truncate(outV, i, redgreen, 0, redgreen.length, 8); + expectedResultLen = redgreen.length; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.truncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 31); + expectedResultLen = ascii_sentence.length; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.truncate(outV, i, blanksLeft, 0, blanksLeft.length, 5); + expectedResultLen = blanksLeft.length; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // Simple truncation. + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.truncate(outV, i, blue, 0, blue.length, 3); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.truncate(outV, i, redgreen, 0, redgreen.length, 6); + expectedResultLen = 6; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.truncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 14); + expectedResultLen = 14; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.truncate(outV, i, blanksLeft, 0, blanksLeft.length, 2); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + StringExpr.truncate(outV, i, blanksRight, 0, blanksRight.length, 4); + expectedResultLen = 4; + Assert.assertTrue(vectorCharacterCount(outV, i) == 4); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + StringExpr.truncate(outV, i, blanksBoth, 0, blanksBoth.length, 2); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + StringExpr.truncate(outV, i, blankString, 0, blankString.length, 1); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + StringExpr.truncate(outV, i, blankRanges, 0, blankRanges.length, 29); + expectedResultLen = 29; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 29); + i++; + + // Offset truncation. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + StringExpr.truncate(outV, i, blanksRight, 1, blanksRight.length - 1, 3); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + StringExpr.truncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, 2); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + StringExpr.truncate(outV, i, blanksBoth, 5, blanksBoth.length -5, 1); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + StringExpr.truncate(outV, i, blankRanges, 4, blankRanges.length - 4, 22); + expectedResultLen = 22; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 22); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + StringExpr.truncate(outV, i, blankRanges, 6, blankRanges.length- 6, 7); + expectedResultLen = 7; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 7); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + StringExpr.truncate(outV, i, blankRanges, 7, blankRanges.length - 7, 20); + expectedResultLen = 20; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 20); + i++; + + // Multi-byte truncation. + byte[] multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + StringExpr.truncate(outV, i, multiByte, 0, 4, 1); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + StringExpr.truncate(outV, i, multiByte, 0, 5, 2); + expectedResultLen = 4; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + StringExpr.truncate(outV, i, multiByte, 0, 9, 2); + expectedResultLen = 5; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + StringExpr.truncate(outV, i, multiByte, 3, 2, 1); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + + byte[] sentenceOne = new byte[100]; + int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + StringExpr.truncate(outV, i, sentenceOne, 0, sentenceOneLen, 8); + expectedResultLen = 20; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + StringExpr.truncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, 3); + expectedResultLen = 9; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + + byte[] sentenceTwo = new byte[100]; + int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + StringExpr.truncate(outV, i, sentenceTwo, 0, sentenceTwoLen, 9); + expectedResultLen = 16; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + StringExpr.truncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, 6); + expectedResultLen = 11; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + + int start; + + // Left pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + StringExpr.truncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 4); + expectedResultLen = 6; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + StringExpr.truncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 7); + expectedResultLen = 13; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + + byte[] sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + StringExpr.truncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 14); + expectedResultLen = 24; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + StringExpr.truncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 9); + expectedResultLen = 15; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + + // Right pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + StringExpr.truncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 1); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + StringExpr.truncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 5); + expectedResultLen = 13; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + + byte[] sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + StringExpr.truncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 6); + expectedResultLen = 11; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + StringExpr.truncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 8); + expectedResultLen = 14; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + StringExpr.truncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, 4); + expectedResultLen = 9; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + StringExpr.truncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 14); + expectedResultLen = 23; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges,0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + StringExpr.truncate(outV, i, sentenceBlankRanges, 7, 17, 11); + expectedResultLen = 15; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 11); + i++; + } + + @Test + // Test basic truncate to vector. + public void testTruncateScalar() { + int largeMaxLength = 100; + + byte[] result; + + // No truncate (ASCII) -- maximum length large. + Assert.assertTrue(StringExpr.characterCount(blue) == 4); + result = StringExpr.truncateScalar(blue, largeMaxLength); + Assert.assertTrue(Arrays.equals(blue, result)); + + Assert.assertTrue(StringExpr.characterCount(redgreen) == 8); + result = StringExpr.truncateScalar(redgreen, largeMaxLength); + Assert.assertTrue(Arrays.equals(redgreen, result)); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31); + result = StringExpr.truncateScalar(ascii_sentence, largeMaxLength); + Assert.assertTrue(Arrays.equals(ascii_sentence, result)); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5); + result = StringExpr.truncateScalar(blanksLeft, largeMaxLength); + Assert.assertTrue(Arrays.equals(blanksLeft, result)); + + // No truncate (ASCII) -- same maximum length. + Assert.assertTrue(StringExpr.characterCount(blue) == 4); + result = StringExpr.truncateScalar(blue, blue.length); + Assert.assertTrue(Arrays.equals(blue, result)); + + Assert.assertTrue(StringExpr.characterCount(redgreen) == 8); + result = StringExpr.truncateScalar(redgreen, redgreen.length); + Assert.assertTrue(Arrays.equals(redgreen, result)); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31); + result = StringExpr.truncateScalar(ascii_sentence, ascii_sentence.length); + Assert.assertTrue(Arrays.equals(ascii_sentence, result)); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5); + result = StringExpr.truncateScalar(blanksLeft, blanksLeft.length); + Assert.assertTrue(Arrays.equals(blanksLeft, result)); + + // Simple truncation. + result = StringExpr.truncateScalar(blue, 3); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blue, 3), result)); + + result = StringExpr.truncateScalar(redgreen, 6); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(redgreen, 6), result)); + + result = StringExpr.truncateScalar(ascii_sentence, 14); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(ascii_sentence, 14), result)); + + result = StringExpr.truncateScalar(blanksLeft, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksLeft, 2), result)); + + result = StringExpr.truncateScalar(blanksRight, 4); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksRight, 4), result)); + + result = StringExpr.truncateScalar(blanksBoth, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksBoth, 2), result)); + + result = StringExpr.truncateScalar(blankString, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankString, 1), result)); + + result = StringExpr.truncateScalar(blankRanges, 29); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankRanges, 29), result)); + + // Multi-byte truncation. + byte[] scratch = new byte[100]; + byte[] multiByte; + + addMultiByteCharRightPadded1_1(scratch); + multiByte = Arrays.copyOf(scratch, 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + result = StringExpr.truncateScalar(multiByte, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 3), result)); + + addMultiByteCharRightPadded1_2(scratch); + multiByte = Arrays.copyOf(scratch, 5); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + result = StringExpr.truncateScalar(multiByte, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 4), result)); + + addMultiByteCharRightPadded1_3(scratch); + multiByte = Arrays.copyOf(scratch, 9); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + result = StringExpr.truncateScalar(multiByte, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 5), result)); + + addMultiByteCharRightPadded1_2(scratch); + multiByte = Arrays.copyOfRange(scratch, 3, 3 + 2); + Assert.assertTrue(StringExpr.characterCount(multiByte) == 2); + result = StringExpr.truncateScalar(multiByte, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 1), result)); + + int sentenceOneLen = addMultiByteCharSentenceOne(scratch, 0); + byte[] sentenceOne = Arrays.copyOf(scratch, sentenceOneLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne) == 10); + result = StringExpr.truncateScalar(sentenceOne, 8); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOne, 20), result)); + + byte[] sentenceOnePortion = Arrays.copyOf(sentenceOne, sentenceOneLen - 3); + Assert.assertTrue(StringExpr.characterCount(sentenceOnePortion) == 9); + result = StringExpr.truncateScalar(sentenceOnePortion, 3); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePortion, 9), result)); + + int sentenceTwoLen = addMultiByteCharSentenceTwo(scratch, 0); + byte[] sentenceTwo = Arrays.copyOf(scratch, sentenceTwoLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo) == 13); + result = StringExpr.truncateScalar(sentenceTwo, 9); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwo, 16), result)); + + byte[] sentenceTwoPortion = Arrays.copyOf(sentenceTwo, sentenceTwoLen - 5); + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPortion) == 10); + result = StringExpr.truncateScalar(sentenceTwoPortion, 6); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPortion, 11), result)); + + int start; + + // Left pad longer strings with multi-byte characters. + start = addPads(scratch, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(scratch, start); + byte[] sentenceOnePaddedLeft = Arrays.copyOf(scratch, sentenceOnePaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft) == 3 + 10); + result = StringExpr.truncateScalar(sentenceOnePaddedLeft, 4); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeft, 6), result)); + + byte[] sentenceOnePaddedLeftPortion = Arrays.copyOf(sentenceOnePaddedLeft, sentenceOnePaddedLeftLen - 3); + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeftPortion) == 3 + 9); + result = StringExpr.truncateScalar(sentenceOnePaddedLeftPortion, 7); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeftPortion, 13), result)); + + start = addPads(scratch, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(scratch, start); + byte[] sentenceTwoPaddedLeft = Arrays.copyOf(scratch, sentenceTwoPaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft) == 2 + 13); + result = StringExpr.truncateScalar(sentenceTwoPaddedLeft, 14); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeft, 24), result)); + + byte[] sentenceTwoPaddedLeftPortion = Arrays.copyOf(sentenceTwoPaddedLeft, sentenceTwoPaddedLeftLen - 5); + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeftPortion) == 2 + 10); + result = StringExpr.truncateScalar(sentenceTwoPaddedLeftPortion, 9); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeftPortion, 15), result)); + + // Right pad longer strings with multi-byte characters. + start = addMultiByteCharSentenceOne(scratch, 0); + int sentenceOnePaddedRightLen = addPads(scratch, start, 4); + byte[] sentenceOnePaddedRight = Arrays.copyOf(scratch, sentenceOnePaddedRightLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight) == 10 + 4); + result = StringExpr.truncateScalar(sentenceOnePaddedRight, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRight, 3), result)); + + byte[] sentenceOnePaddedRightPortion = Arrays.copyOf(sentenceOnePaddedRight, sentenceOnePaddedRightLen - 3 - 4); + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRightPortion) == 9); + result = StringExpr.truncateScalar(sentenceOnePaddedRightPortion, 5); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRightPortion, 13), result)); + + start = addMultiByteCharSentenceTwo(scratch, 0); + int sentenceTwoPaddedRightLen = addPads(scratch, start, 1); + byte[] sentenceTwoPaddedRight = Arrays.copyOf(scratch, sentenceTwoPaddedRightLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight) == 13 + 1); + result = StringExpr.truncateScalar(sentenceTwoPaddedRight, 6); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRight, 11), result)); + + byte[] sentenceTwoPaddedRightPortion = Arrays.copyOf(sentenceTwoPaddedRight, sentenceTwoPaddedRightLen - 5 - 1); + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRightPortion) == 10); + result = StringExpr.truncateScalar(sentenceTwoPaddedRightPortion, 8); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRightPortion, 14), result)); + + // Multi-byte characters with blank ranges. + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(scratch, 0); + byte[] sentenceBlankRanges = Arrays.copyOf(scratch, sentenceBlankRangesLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges) == 17); + result = StringExpr.truncateScalar(sentenceBlankRanges, 4); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRanges, 9), result)); + + byte[] sentenceBlankRangesPortion = Arrays.copyOf(sentenceBlankRanges, sentenceBlankRangesLen - 3); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 16); + result = StringExpr.truncateScalar(sentenceBlankRangesPortion, 14); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 23), result)); + + sentenceBlankRangesPortion = Arrays.copyOfRange(sentenceBlankRanges, 7, 7 + 17); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 13); + result = StringExpr.truncateScalar(sentenceBlankRangesPortion, 11); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 15), result)); + Assert.assertTrue(StringExpr.characterCount(result) == 11); + } + + @Test + // Test basic right trim and truncate to vector. + public void testRightTrimAndTruncateBytesSlice() { + // This first section repeats the tests of testRightTrimWithOffset with a large maxLength parameter. + // (i.e. too large to have an effect). + int largeMaxLength = 100; + + int resultLen; + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.rightTrimAndTruncate(blue, 0, blue.length, largeMaxLength); + Assert.assertTrue(resultLen == blue.length); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.rightTrimAndTruncate(redgreen, 0, redgreen.length, largeMaxLength); + Assert.assertTrue(resultLen == redgreen.length); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.rightTrimAndTruncate(ascii_sentence, 0, ascii_sentence.length, largeMaxLength); + Assert.assertTrue(resultLen == ascii_sentence.length); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, largeMaxLength); + Assert.assertTrue(resultLen == blanksLeft.length); + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, blanksRight.length, largeMaxLength); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 0, blanksBoth.length, largeMaxLength); + Assert.assertTrue(resultLen == 5); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5); + + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + resultLen = StringExpr.rightTrimAndTruncate(blankString, 0, blankString.length, largeMaxLength); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 0, blankRanges.length, largeMaxLength); + Assert.assertTrue(resultLen == blankRanges.length - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26); + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 1, blanksRight.length - 1, largeMaxLength); + Assert.assertTrue(resultLen == 2); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 4, blanksBoth.length - 4, largeMaxLength); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 5, blanksBoth.length -5, largeMaxLength); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + resultLen = StringExpr.rightTrimAndTruncate(blankString, 1, blankString.length - 1, largeMaxLength); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 4, blankRanges.length - 4, largeMaxLength); + Assert.assertTrue(resultLen == blankRanges.length - 4 -4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 6, blankRanges.length- 6, largeMaxLength); + Assert.assertTrue(resultLen == blankRanges.length - 6 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, blankRanges.length - 7, largeMaxLength); + Assert.assertTrue(resultLen == blankRanges.length - 7 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, 8 - 7, largeMaxLength); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0); + + // Multi-byte trims. + byte[] multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 4, largeMaxLength); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 5, largeMaxLength); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2); + + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 9, largeMaxLength); + Assert.assertTrue(resultLen == 8); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3); + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 1, largeMaxLength); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 2, largeMaxLength); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1); + + byte[] sentenceOne = new byte[100]; + int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceOneLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen - 3, largeMaxLength); + Assert.assertTrue(resultLen == sentenceOneLen - 3); + + byte[] sentenceTwo = new byte[100]; + int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceTwoLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen - 5, largeMaxLength); + Assert.assertTrue(resultLen == sentenceTwoLen - 5); + + int start; + + // Left pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, largeMaxLength); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3); + + byte[] sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, largeMaxLength); + Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5); + + // Right pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, largeMaxLength); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4); + + byte[] sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, largeMaxLength); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1); + + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, largeMaxLength); + Assert.assertTrue(resultLen == sentenceBlankRangesLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, largeMaxLength); + Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, largeMaxLength); + Assert.assertTrue(resultLen == 12); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8); + + // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is + // exactly the number of current characters in the string. This shouldn't affect the trim. + + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.rightTrimAndTruncate(blue, 0, blue.length, 4); + Assert.assertTrue(resultLen == blue.length); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.rightTrimAndTruncate(redgreen, 0, redgreen.length, 8); + Assert.assertTrue(resultLen == redgreen.length); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.rightTrimAndTruncate(ascii_sentence, 0, ascii_sentence.length, 31); + Assert.assertTrue(resultLen == ascii_sentence.length); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, 5); + Assert.assertTrue(resultLen == blanksLeft.length); + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, blanksRight.length, 5); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 0, blanksBoth.length, 7); + Assert.assertTrue(resultLen == 5); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5); + + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + resultLen = StringExpr.rightTrimAndTruncate(blankString, 0, blankString.length, 2); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 0, blankRanges.length, 30); + Assert.assertTrue(resultLen == blankRanges.length - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 26); + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 1, blanksRight.length - 1, 4); + Assert.assertTrue(resultLen == 2); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 4, blanksBoth.length - 4, 3); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 5, blanksBoth.length -5, 2); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + resultLen = StringExpr.rightTrimAndTruncate(blankString, 1, blankString.length - 1, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 4, blankRanges.length - 4, 26); + Assert.assertTrue(resultLen == blankRanges.length - 4 -4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, resultLen) == 22); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 6, blankRanges.length- 6, 24); + Assert.assertTrue(resultLen == blankRanges.length - 6 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, blankRanges.length - 7, 23); + Assert.assertTrue(resultLen == blankRanges.length - 7 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 19); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, 8 - 7, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0); + + // Multi-byte trims. + multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 4, 2); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 5, 3); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2); + + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 9, 4); + Assert.assertTrue(resultLen == 8); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3); + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 1, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 2, 2); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1); + + sentenceOne = new byte[100]; + sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen, 10); + Assert.assertTrue(resultLen == sentenceOneLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen - 3, 9); + Assert.assertTrue(resultLen == sentenceOneLen - 3); + + sentenceTwo = new byte[100]; + sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen, 13); + Assert.assertTrue(resultLen == sentenceTwoLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen - 5, 10); + Assert.assertTrue(resultLen == sentenceTwoLen - 5); + + // Left pad longer strings with multi-byte characters. + sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 10); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 9); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 3); + + sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 2 + 13); + Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 2 + 10); + Assert.assertTrue(resultLen == sentenceTwoPaddedLeftLen - 5); + + // Right pad longer strings with multi-byte characters. + sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10 + 4); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 9); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 3 - 4); + + sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13 + 1); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 10); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 5 - 1); + + // Multi-byte characters with blank ranges. + sentenceBlankRanges = new byte[100]; + sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, 17); + Assert.assertTrue(resultLen == sentenceBlankRangesLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 16); + Assert.assertTrue(resultLen == sentenceBlankRangesLen - 3); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, largeMaxLength); + Assert.assertTrue(resultLen == 12); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8); + + // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is + // less than the number of current characters in the string and thus affects the trim. + + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + resultLen = StringExpr.rightTrimAndTruncate(blue, 0, blue.length, 3); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blue, 0, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + resultLen = StringExpr.rightTrimAndTruncate(redgreen, 0, redgreen.length, 6); + Assert.assertTrue(resultLen == 6); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + resultLen = StringExpr.rightTrimAndTruncate(ascii_sentence, 0, ascii_sentence.length, 30); + Assert.assertTrue(resultLen == 30); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + resultLen = StringExpr.rightTrimAndTruncate(blanksLeft, 0, blanksLeft.length, 1); + Assert.assertTrue(resultLen == 0); + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 0, blanksRight.length, 4); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, resultLen) == 3); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 0, blanksBoth.length, 6); + Assert.assertTrue(resultLen == 5); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, resultLen) == 5); + + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + resultLen = StringExpr.rightTrimAndTruncate(blankString, 0, blankString.length, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 0, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 0, blankRanges.length, 19); + Assert.assertTrue(resultLen == 15); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, resultLen) == 15); + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + resultLen = StringExpr.rightTrimAndTruncate(blanksRight, 1, blanksRight.length - 1, 3); + Assert.assertTrue(resultLen == 2); + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, resultLen) == 2); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 4, blanksBoth.length - 4, 2); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, resultLen) == 1); + + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + resultLen = StringExpr.rightTrimAndTruncate(blanksBoth, 5, blanksBoth.length -5, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + resultLen = StringExpr.rightTrimAndTruncate(blankString, 1, blankString.length - 1, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankString, 1, resultLen) == 0); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 3, 6) == 6); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 3, 6, 5); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 3, resultLen) == 4); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 6, blankRanges.length- 6, 22); + Assert.assertTrue(resultLen == blankRanges.length - 6 - 4); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, resultLen) == 20); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, blankRanges.length - 7, 10); + Assert.assertTrue(resultLen == 8); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 8); + + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + resultLen = StringExpr.rightTrimAndTruncate(blankRanges, 7, 8 - 7, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, resultLen) == 0); + + // Multi-byte trims. + multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 4, 1); + Assert.assertTrue(resultLen == 3); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 1); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 5, 2); + Assert.assertTrue(resultLen == 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 2); + + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 0, 9, 3); + Assert.assertTrue(resultLen == 8); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 3); + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 1, 1); + Assert.assertTrue(resultLen == 0); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, resultLen) == 0); + + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + resultLen = StringExpr.rightTrimAndTruncate(multiByte, 3, 2, 1); + Assert.assertTrue(resultLen == 1); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, resultLen) == 1); + + sentenceOne = new byte[100]; + sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen, 7); + Assert.assertTrue(resultLen == sentenceOneLen - 9); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOne, 0, sentenceOneLen - 3, 6); + Assert.assertTrue(resultLen == 13); + + sentenceTwo = new byte[100]; + sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen, 13); + Assert.assertTrue(resultLen == sentenceTwoLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwo, 0, sentenceTwoLen - 5, 10); + Assert.assertTrue(resultLen == sentenceTwoLen - 5); + + // Left pad longer strings with multi-byte characters. + sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 8); + Assert.assertTrue(resultLen == sentenceOnePaddedLeftLen - 6); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 6); + Assert.assertTrue(resultLen == 16); + + sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 7); + Assert.assertTrue(resultLen == 10); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 6); + Assert.assertTrue(resultLen == 10); + + // Right pad longer strings with multi-byte characters. + sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10); + Assert.assertTrue(resultLen == sentenceOnePaddedRightLen - 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + resultLen = StringExpr.rightTrimAndTruncate(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 7); + Assert.assertTrue(resultLen == 17); + + sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13); + Assert.assertTrue(resultLen == sentenceTwoPaddedRightLen - 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + resultLen = StringExpr.rightTrimAndTruncate(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 4); + Assert.assertTrue(resultLen == 8); + + // Multi-byte characters with blank ranges. + sentenceBlankRanges = new byte[100]; + sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen, 4); + Assert.assertTrue(resultLen == 7); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 6); + Assert.assertTrue(resultLen == 11); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, 12) == 8); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 4, 12, 6); + Assert.assertTrue(resultLen == 7); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, resultLen) == 5); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + resultLen = StringExpr.rightTrimAndTruncate(sentenceBlankRanges, 7, 17, 11); + Assert.assertTrue(resultLen == 12); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, resultLen) == 8); + + } + + @Test + // Test basic right trim and truncate to vector. + public void testRightTrimAndTruncateBytesColumnVector() { + BytesColumnVector outV = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + outV.initBuffer(35); // initialize with estimated element size 35 + + int i = 0; + + // This first section repeats the tests of testRightTrimWithOffset with a large maxLength parameter. + // (i.e. too large to have an effect). + int largeMaxLength = 100; + + int expectedResultLen; + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.rightTrimAndTruncate(outV, i, blue, 0, blue.length, largeMaxLength); + expectedResultLen = blue.length; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.rightTrimAndTruncate(outV, i, redgreen, 0, redgreen.length, largeMaxLength); + expectedResultLen = redgreen.length; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.rightTrimAndTruncate(outV, i, ascii_sentence, 0, ascii_sentence.length, largeMaxLength); + expectedResultLen = ascii_sentence.length; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.rightTrimAndTruncate(outV, i, blanksLeft, 0, blanksLeft.length, largeMaxLength); + expectedResultLen = blanksLeft.length; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 0, blanksRight.length, largeMaxLength); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 0, blanksBoth.length, largeMaxLength); + expectedResultLen = 5; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 5); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + StringExpr.rightTrimAndTruncate(outV, i, blankString, 0, blankString.length, largeMaxLength); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 0, blankRanges.length, largeMaxLength); + expectedResultLen = blankRanges.length - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 26); + i++; + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 1, blanksRight.length - 1, largeMaxLength); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, largeMaxLength); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 5, blanksBoth.length -5, largeMaxLength); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + StringExpr.rightTrimAndTruncate(outV, i, blankString, 1, blankString.length - 1, largeMaxLength); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 4, blankRanges.length - 4, largeMaxLength); + expectedResultLen = blankRanges.length - 4 -4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 22); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 6, blankRanges.length- 6, largeMaxLength); + expectedResultLen = blankRanges.length - 6 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 20); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, blankRanges.length - 7, largeMaxLength); + expectedResultLen = blankRanges.length - 7 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 19); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, 8 - 7, largeMaxLength); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + // Multi-byte trims. + byte[] multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 4, largeMaxLength); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 5, largeMaxLength); + expectedResultLen = 4; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 9, largeMaxLength); + expectedResultLen = 8; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 1, largeMaxLength); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 2, largeMaxLength); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + byte[] sentenceOne = new byte[100]; + int sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen, largeMaxLength); + expectedResultLen = sentenceOneLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, largeMaxLength); + expectedResultLen = sentenceOneLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + + byte[] sentenceTwo = new byte[100]; + int sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen, largeMaxLength); + expectedResultLen = sentenceTwoLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, largeMaxLength); + expectedResultLen = sentenceTwoLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + + int start; + + // Left pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, largeMaxLength); + expectedResultLen = sentenceOnePaddedLeftLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, largeMaxLength); + expectedResultLen = sentenceOnePaddedLeftLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + byte[] sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, largeMaxLength); + expectedResultLen = sentenceTwoPaddedLeftLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, largeMaxLength); + expectedResultLen = sentenceTwoPaddedLeftLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + + // Right pad longer strings with multi-byte characters. + byte[] sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + int sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, largeMaxLength); + expectedResultLen = sentenceOnePaddedRightLen - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, largeMaxLength); + expectedResultLen = sentenceOnePaddedRightLen - 3 - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + byte[] sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + int sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, largeMaxLength); + expectedResultLen = sentenceTwoPaddedRightLen - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, largeMaxLength); + expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + // Multi-byte characters with blank ranges. + byte[] sentenceBlankRanges = new byte[100]; + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, largeMaxLength); + expectedResultLen = sentenceBlankRangesLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, largeMaxLength); + expectedResultLen = sentenceBlankRangesLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 7, 17, largeMaxLength); + expectedResultLen = 12; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 8); + i++; + + // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is + // exactly the number of current characters in the string. This shouldn't affect the trim. + + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.rightTrimAndTruncate(outV, i, blue, 0, blue.length, 4); + expectedResultLen = blue.length; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 4); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.rightTrimAndTruncate(outV, i, redgreen, 0, redgreen.length, 8); + expectedResultLen = redgreen.length; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.rightTrimAndTruncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 31); + expectedResultLen = ascii_sentence.length; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.rightTrimAndTruncate(outV, i, blanksLeft, 0, blanksLeft.length, 5); + expectedResultLen = blanksLeft.length; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 0, blanksRight.length, 5); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 0, blanksBoth.length, 7); + expectedResultLen = 5; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 5); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + StringExpr.rightTrimAndTruncate(outV, i, blankString, 0, blankString.length, 2); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 0, blankRanges.length, 30); + expectedResultLen = blankRanges.length - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 26); + i++; + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 1, blanksRight.length - 1, 4); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, 3); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 5, blanksBoth.length -5, 2); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + StringExpr.rightTrimAndTruncate(outV, i, blankString, 1, blankString.length - 1, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 4, blankRanges.length - 4) == 26); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 4, blankRanges.length - 4, 26); + expectedResultLen = blankRanges.length - 4 -4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 22); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 6, blankRanges.length- 6, 24); + expectedResultLen = blankRanges.length - 6 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 20); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, blankRanges.length - 7, 23); + expectedResultLen = blankRanges.length - 7 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 19); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, 8 - 7, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + + // Multi-byte trims. + multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 4, 2); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 5, 3); + expectedResultLen = 4; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 9, 4); + expectedResultLen = 8; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 1, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 2, 2); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + + sentenceOne = new byte[100]; + sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen, 10); + expectedResultLen = sentenceOneLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, 9); + expectedResultLen = sentenceOneLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + + sentenceTwo = new byte[100]; + sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen, 13); + expectedResultLen = sentenceTwoLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, 10); + expectedResultLen = sentenceTwoLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + + // Left pad longer strings with multi-byte characters. + sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 10); + expectedResultLen = sentenceOnePaddedLeftLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 9); + expectedResultLen = sentenceOnePaddedLeftLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 2 + 13); + expectedResultLen = sentenceTwoPaddedLeftLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 2 + 10); + expectedResultLen = sentenceTwoPaddedLeftLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + + // Right pad longer strings with multi-byte characters. + sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10 + 4); + expectedResultLen = sentenceOnePaddedRightLen - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 9); + expectedResultLen = sentenceOnePaddedRightLen - 3 - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + + sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13 + 1); + expectedResultLen = sentenceTwoPaddedRightLen - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 10); + expectedResultLen = sentenceTwoPaddedRightLen - 5 - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + + // Multi-byte characters with blank ranges. + sentenceBlankRanges = new byte[100]; + sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, 17); + expectedResultLen = sentenceBlankRangesLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 16); + expectedResultLen = sentenceBlankRangesLen - 3; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 7, 17, largeMaxLength); + expectedResultLen = 12; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 8); + i++; + + // This next section repeats the tests of testRightTrimWithOffset with a maxLength parameter that is + // less than the number of current characters in the string and thus affects the trim. + + // Nothing to trim (ASCII). + Assert.assertTrue(StringExpr.characterCount(blue, 0, blue.length) == 4); + StringExpr.rightTrimAndTruncate(outV, i, blue, 0, blue.length, 3); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blue, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(redgreen, 0, redgreen.length) == 8); + StringExpr.rightTrimAndTruncate(outV, i, redgreen, 0, redgreen.length, 6); + expectedResultLen = 6; + Assert.assertTrue(vectorEqual(outV, i, redgreen, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(ascii_sentence, 0, ascii_sentence.length) == 31); + StringExpr.rightTrimAndTruncate(outV, i, ascii_sentence, 0, ascii_sentence.length, 30); + expectedResultLen = 30; + Assert.assertTrue(vectorEqual(outV, i, ascii_sentence, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksLeft, 0, blanksLeft.length) == 5); + StringExpr.rightTrimAndTruncate(outV, i, blanksLeft, 0, blanksLeft.length, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blanksLeft, 0, expectedResultLen)); + i++; + + // Simple trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 0, blanksRight.length) == 5); + StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 0, blanksRight.length, 4); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 0, blanksBoth.length) == 7); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 0, blanksBoth.length, 6); + expectedResultLen = 5; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 5); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 0, blankString.length) == 2); + StringExpr.rightTrimAndTruncate(outV, i, blankString, 0, blankString.length, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 0, blankRanges.length) == 30); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 0, blankRanges.length, 19); + expectedResultLen = 15; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 15); + i++; + + // Offset trims. + Assert.assertTrue(StringExpr.characterCount(blanksRight, 1, blanksRight.length - 1) == 4); + StringExpr.rightTrimAndTruncate(outV, i, blanksRight, 1, blanksRight.length - 1, 3); + expectedResultLen = 2; + Assert.assertTrue(vectorEqual(outV, i, blanksRight, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 4, blanksBoth.length - 4) == 3); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 4, blanksBoth.length - 4, 2); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + Assert.assertTrue(StringExpr.characterCount(blanksBoth, 5, blanksBoth.length - 5) == 2); + StringExpr.rightTrimAndTruncate(outV, i, blanksBoth, 5, blanksBoth.length -5, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blanksBoth, 5, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankString, 1, blankString.length - 1) == 1); + StringExpr.rightTrimAndTruncate(outV, i, blankString, 1, blankString.length - 1, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankString, 1, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 3, 6) == 6); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 3, 6, 5); + expectedResultLen = 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 4); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 6, blankRanges.length - 6) == 24); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 6, blankRanges.length- 6, 22); + expectedResultLen = blankRanges.length - 6 - 4; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 6, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 20); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, blankRanges.length - 7) == 23); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, blankRanges.length - 7, 10); + expectedResultLen = 8; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 8); + i++; + Assert.assertTrue(StringExpr.characterCount(blankRanges, 7, 8 - 7) == 1); + StringExpr.rightTrimAndTruncate(outV, i, blankRanges, 7, 8 - 7, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, blankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + + // Multi-byte trims. + multiByte = new byte[100]; + + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 4, 1); + expectedResultLen = 3; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 5, 2); + expectedResultLen = 4; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 2); + i++; + addMultiByteCharRightPadded1_3(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 0, 9, 3); + expectedResultLen = 8; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 0, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 3); + i++; + addMultiByteCharRightPadded1_1(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 1) == 1); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 1, 1); + expectedResultLen = 0; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 0); + i++; + addMultiByteCharRightPadded1_2(multiByte); + Assert.assertTrue(StringExpr.characterCount(multiByte, 3, 2) == 2); + StringExpr.rightTrimAndTruncate(outV, i, multiByte, 3, 2, 1); + expectedResultLen = 1; + Assert.assertTrue(vectorEqual(outV, i, multiByte, 3, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 1); + i++; + + sentenceOne = new byte[100]; + sentenceOneLen = addMultiByteCharSentenceOne(sentenceOne, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen, 7); + expectedResultLen = sentenceOneLen - 9; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOne, 0, sentenceOneLen - 3) == 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOne, 0, sentenceOneLen - 3, 6); + expectedResultLen = 13; + Assert.assertTrue(vectorEqual(outV, i, sentenceOne, 0, expectedResultLen)); + i++; + + sentenceTwo = new byte[100]; + sentenceTwoLen = addMultiByteCharSentenceTwo(sentenceTwo, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen) == 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen, 13); + expectedResultLen = sentenceTwoLen; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwo, 0, sentenceTwoLen- 5) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwo, 0, sentenceTwoLen - 5, 10); + expectedResultLen = sentenceTwoLen - 5; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwo, 0, expectedResultLen)); + i++; + + // Left pad longer strings with multi-byte characters. + sentenceOnePaddedLeft = new byte[100]; + start = addPads(sentenceOnePaddedLeft, 0, 3); + sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(sentenceOnePaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen) == 3 + 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen, 3 + 8); + expectedResultLen = sentenceOnePaddedLeftLen - 6; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3) == 3 + 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedLeft, 0, sentenceOnePaddedLeftLen - 3, 3 + 6); + expectedResultLen = 16; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedLeft, 0, expectedResultLen)); + i++; + + sentenceTwoPaddedLeft = new byte[100]; + start = addPads(sentenceTwoPaddedLeft, 0, 2); + sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(sentenceTwoPaddedLeft, start); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen) == 2 + 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen, 7); + expectedResultLen = 10; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5) == 2 + 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedLeft, 0, sentenceTwoPaddedLeftLen - 5, 6); + expectedResultLen = 10; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedLeft, 0, expectedResultLen)); + i++; + + // Right pad longer strings with multi-byte characters. + sentenceOnePaddedRight = new byte[100]; + start = addMultiByteCharSentenceOne(sentenceOnePaddedRight, 0); + sentenceOnePaddedRightLen = addPads(sentenceOnePaddedRight, start, 4); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen) == 10 + 4); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen, 10); + expectedResultLen = sentenceOnePaddedRightLen - 4; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4) == 9); + StringExpr.rightTrimAndTruncate(outV, i, sentenceOnePaddedRight, 0, sentenceOnePaddedRightLen - 3 - 4, 7); + expectedResultLen = 17; + Assert.assertTrue(vectorEqual(outV, i, sentenceOnePaddedRight, 0, expectedResultLen)); + i++; + + sentenceTwoPaddedRight = new byte[100]; + start = addMultiByteCharSentenceTwo(sentenceTwoPaddedRight, 0); + sentenceTwoPaddedRightLen = addPads(sentenceTwoPaddedRight, start, 1); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen) == 13 + 1); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen, 13); + expectedResultLen = sentenceTwoPaddedRightLen - 1; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1) == 10); + StringExpr.rightTrimAndTruncate(outV, i, sentenceTwoPaddedRight, 0, sentenceTwoPaddedRightLen - 5 - 1, 4); + expectedResultLen = 8; + Assert.assertTrue(vectorEqual(outV, i, sentenceTwoPaddedRight, 0, expectedResultLen)); + i++; + + // Multi-byte characters with blank ranges. + sentenceBlankRanges = new byte[100]; + sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(sentenceBlankRanges, 0); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen) == 17); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen, 4); + expectedResultLen = 7; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 0, sentenceBlankRangesLen - 3) == 16); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 0, sentenceBlankRangesLen - 3, 6); + expectedResultLen = 11; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 0, expectedResultLen)); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 4, 12) == 8); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 4, 12, 6); + expectedResultLen = 7; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 4, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 5); + i++; + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges, 7, 17) == 13); + StringExpr.rightTrimAndTruncate(outV, i, sentenceBlankRanges, 7, 17, 11); + expectedResultLen = 12; + Assert.assertTrue(vectorEqual(outV, i, sentenceBlankRanges, 7, expectedResultLen)); + Assert.assertTrue(vectorCharacterCount(outV, i) == 8); + i++; + } + + @Test + // Test basic truncate to vector. + public void testRightTrimAndTruncateScalar() { + int largeMaxLength = 100; + + byte[] result; + + // No truncate (ASCII) -- maximum length large. + Assert.assertTrue(StringExpr.characterCount(blue) == 4); + result = StringExpr.rightTrimAndTruncateScalar(blue, largeMaxLength); + Assert.assertTrue(Arrays.equals(blue, result)); + + Assert.assertTrue(StringExpr.characterCount(redgreen) == 8); + result = StringExpr.rightTrimAndTruncateScalar(redgreen, largeMaxLength); + Assert.assertTrue(Arrays.equals(redgreen, result)); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31); + result = StringExpr.rightTrimAndTruncateScalar(ascii_sentence, largeMaxLength); + Assert.assertTrue(Arrays.equals(ascii_sentence, result)); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5); + result = StringExpr.rightTrimAndTruncateScalar(blanksLeft, largeMaxLength); + Assert.assertTrue(Arrays.equals(blanksLeft, result)); + + // No truncate (ASCII) -- same maximum length. + Assert.assertTrue(StringExpr.characterCount(blue) == 4); + result = StringExpr.rightTrimAndTruncateScalar(blue, blue.length); + Assert.assertTrue(Arrays.equals(blue, result)); + + Assert.assertTrue(StringExpr.characterCount(redgreen) == 8); + result = StringExpr.rightTrimAndTruncateScalar(redgreen, redgreen.length); + Assert.assertTrue(Arrays.equals(redgreen, result)); + + Assert.assertTrue(StringExpr.characterCount(ascii_sentence) == 31); + result = StringExpr.rightTrimAndTruncateScalar(ascii_sentence, ascii_sentence.length); + Assert.assertTrue(Arrays.equals(ascii_sentence, result)); + + Assert.assertTrue(StringExpr.characterCount(blanksLeft) == 5); + result = StringExpr.rightTrimAndTruncateScalar(blanksLeft, blanksLeft.length); + Assert.assertTrue(Arrays.equals(blanksLeft, result)); + + // Simple truncation. + result = StringExpr.rightTrimAndTruncateScalar(blue, 3); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blue, 3), result)); + + result = StringExpr.rightTrimAndTruncateScalar(redgreen, 6); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(redgreen, 6), result)); + + result = StringExpr.rightTrimAndTruncateScalar(ascii_sentence, 14); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(ascii_sentence, 14), result)); + + result = StringExpr.rightTrimAndTruncateScalar(blanksLeft, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksLeft, 0), result)); + + result = StringExpr.rightTrimAndTruncateScalar(blanksRight, 4); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksRight, 3), result)); + + result = StringExpr.rightTrimAndTruncateScalar(blanksBoth, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blanksBoth, 0), result)); + + result = StringExpr.rightTrimAndTruncateScalar(blankString, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankString, 0), result)); + + result = StringExpr.rightTrimAndTruncateScalar(blankRanges, 29); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(blankRanges, 26), result)); + + // Multi-byte truncation. + byte[] scratch = new byte[100]; + byte[] multiByte; + + addMultiByteCharRightPadded1_1(scratch); + multiByte = Arrays.copyOf(scratch, 4); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 4) == 2); + result = StringExpr.rightTrimAndTruncateScalar(multiByte, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 3), result)); + + addMultiByteCharRightPadded1_2(scratch); + multiByte = Arrays.copyOf(scratch, 5); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 5) == 3); + result = StringExpr.rightTrimAndTruncateScalar(multiByte, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 4), result)); + + addMultiByteCharRightPadded1_3(scratch); + multiByte = Arrays.copyOf(scratch, 9); + Assert.assertTrue(StringExpr.characterCount(multiByte, 0, 9) == 4); + result = StringExpr.rightTrimAndTruncateScalar(multiByte, 2); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 5), result)); + + addMultiByteCharRightPadded1_2(scratch); + multiByte = Arrays.copyOfRange(scratch, 3, 3 + 2); + Assert.assertTrue(StringExpr.characterCount(multiByte) == 2); + result = StringExpr.rightTrimAndTruncateScalar(multiByte, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(multiByte, 1), result)); + + int sentenceOneLen = addMultiByteCharSentenceOne(scratch, 0); + byte[] sentenceOne = Arrays.copyOf(scratch, sentenceOneLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOne) == 10); + result = StringExpr.rightTrimAndTruncateScalar(sentenceOne, 8); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOne, 20), result)); + + byte[] sentenceOnePortion = Arrays.copyOf(sentenceOne, sentenceOneLen - 3); + Assert.assertTrue(StringExpr.characterCount(sentenceOnePortion) == 9); + result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePortion, 3); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePortion, 9), result)); + + int sentenceTwoLen = addMultiByteCharSentenceTwo(scratch, 0); + byte[] sentenceTwo = Arrays.copyOf(scratch, sentenceTwoLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwo) == 13); + result = StringExpr.rightTrimAndTruncateScalar(sentenceTwo, 9); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwo, 16), result)); + + byte[] sentenceTwoPortion = Arrays.copyOf(sentenceTwo, sentenceTwoLen - 5); + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPortion) == 10); + result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPortion, 6); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPortion, 11), result)); + + int start; + + // Left pad longer strings with multi-byte characters. + start = addPads(scratch, 0, 3); + int sentenceOnePaddedLeftLen = addMultiByteCharSentenceOne(scratch, start); + byte[] sentenceOnePaddedLeft = Arrays.copyOf(scratch, sentenceOnePaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeft) == 3 + 10); + result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedLeft, 4); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeft, 6), result)); + + byte[] sentenceOnePaddedLeftPortion = Arrays.copyOf(sentenceOnePaddedLeft, sentenceOnePaddedLeftLen - 3); + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedLeftPortion) == 3 + 9); + result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedLeftPortion, 7); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedLeftPortion, 12), result)); + + start = addPads(scratch, 0, 2); + int sentenceTwoPaddedLeftLen = addMultiByteCharSentenceTwo(scratch, start); + byte[] sentenceTwoPaddedLeft = Arrays.copyOf(scratch, sentenceTwoPaddedLeftLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeft) == 2 + 13); + result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedLeft, 14); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeft, 24), result)); + + byte[] sentenceTwoPaddedLeftPortion = Arrays.copyOf(sentenceTwoPaddedLeft, sentenceTwoPaddedLeftLen - 5); + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedLeftPortion) == 2 + 10); + result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedLeftPortion, 9); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedLeftPortion, 15), result)); + + // Right pad longer strings with multi-byte characters. + start = addMultiByteCharSentenceOne(scratch, 0); + int sentenceOnePaddedRightLen = addPads(scratch, start, 4); + byte[] sentenceOnePaddedRight = Arrays.copyOf(scratch, sentenceOnePaddedRightLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRight) == 10 + 4); + result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedRight, 1); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRight, 3), result)); + + byte[] sentenceOnePaddedRightPortion = Arrays.copyOf(sentenceOnePaddedRight, sentenceOnePaddedRightLen - 3 - 4); + Assert.assertTrue(StringExpr.characterCount(sentenceOnePaddedRightPortion) == 9); + result = StringExpr.rightTrimAndTruncateScalar(sentenceOnePaddedRightPortion, 5); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceOnePaddedRightPortion, 13), result)); + + start = addMultiByteCharSentenceTwo(scratch, 0); + int sentenceTwoPaddedRightLen = addPads(scratch, start, 1); + byte[] sentenceTwoPaddedRight = Arrays.copyOf(scratch, sentenceTwoPaddedRightLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRight) == 13 + 1); + result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedRight, 6); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRight, 11), result)); + + byte[] sentenceTwoPaddedRightPortion = Arrays.copyOf(sentenceTwoPaddedRight, sentenceTwoPaddedRightLen - 5 - 1); + Assert.assertTrue(StringExpr.characterCount(sentenceTwoPaddedRightPortion) == 10); + result = StringExpr.rightTrimAndTruncateScalar(sentenceTwoPaddedRightPortion, 8); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceTwoPaddedRightPortion, 13), result)); + + // Multi-byte characters with blank ranges. + int sentenceBlankRangesLen = addMultiByteCharSentenceBlankRanges(scratch, 0); + byte[] sentenceBlankRanges = Arrays.copyOf(scratch, sentenceBlankRangesLen); + + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRanges) == 17); + result = StringExpr.rightTrimAndTruncateScalar(sentenceBlankRanges, 4); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRanges, 7), result)); + + byte[] sentenceBlankRangesPortion = Arrays.copyOf(sentenceBlankRanges, sentenceBlankRangesLen - 3); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 16); + result = StringExpr.rightTrimAndTruncateScalar(sentenceBlankRangesPortion, 14); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 19), result)); + + sentenceBlankRangesPortion = Arrays.copyOfRange(sentenceBlankRanges, 7, 7 + 17); + Assert.assertTrue(StringExpr.characterCount(sentenceBlankRangesPortion) == 13); + result = StringExpr.rightTrimAndTruncateScalar(sentenceBlankRangesPortion, 11); + Assert.assertTrue(Arrays.equals(Arrays.copyOf(sentenceBlankRangesPortion, 12), result)); + Assert.assertTrue(StringExpr.characterCount(result) == 8); + } @Test // Load a BytesColumnVector by copying in large data, enough to force // the buffer to expand. @@ -157,7 +3209,37 @@ public void testLoadBytesColumnVectorByRef() { public void testStringColCompareStringScalarFilter() { VectorizedRowBatch batch = makeStringBatch(); VectorExpression expr; - expr = new FilterStringColEqualStringScalar(0, red2); + expr = new FilterStringGroupColEqualStringScalar(0, red2); + expr.evaluate(batch); + + // only red qualifies, and it's in entry 0 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 0); + + batch = makeStringBatch(); + expr = new FilterStringGroupColLessStringScalar(0, red2); + expr.evaluate(batch); + + // only green qualifies, and it's in entry 1 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 1); + + batch = makeStringBatch(); + expr = new FilterStringGroupColGreaterEqualStringScalar(0, green); + expr.evaluate(batch); + + // green and red qualify + Assert.assertTrue(batch.size == 2); + Assert.assertTrue(batch.selected[0] == 0); + Assert.assertTrue(batch.selected[1] == 1); + } + + @Test + // Test string column to CHAR literal comparison + public void testStringColCompareCharScalarFilter() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + expr = new FilterStringGroupColEqualCharScalar(0, new HiveChar(new String(red2), 10)); expr.evaluate(batch); // only red qualifies, and it's in entry 0 @@ -165,7 +3247,7 @@ public void testStringColCompareStringScalarFilter() { Assert.assertTrue(batch.selected[0] == 0); batch = makeStringBatch(); - expr = new FilterStringColLessStringScalar(0, red2); + expr = new FilterStringGroupColLessCharScalar(0, new HiveChar(new String(red2), 8)); expr.evaluate(batch); // only green qualifies, and it's in entry 1 @@ -173,7 +3255,37 @@ public void testStringColCompareStringScalarFilter() { Assert.assertTrue(batch.selected[0] == 1); batch = makeStringBatch(); - expr = new FilterStringColGreaterEqualStringScalar(0, green); + expr = new FilterStringGroupColGreaterEqualCharScalar(0, new HiveChar(new String(green), 12)); + expr.evaluate(batch); + + // green and red qualify + Assert.assertTrue(batch.size == 2); + Assert.assertTrue(batch.selected[0] == 0); + Assert.assertTrue(batch.selected[1] == 1); + } + + @Test + // Test string column to VARCHAR literal comparison + public void testStringColCompareVarCharScalarFilter() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + expr = new FilterStringGroupColEqualVarCharScalar(0, new HiveVarchar(new String(red2), 10)); + expr.evaluate(batch); + + // only red qualifies, and it's in entry 0 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 0); + + batch = makeStringBatch(); + expr = new FilterStringGroupColLessVarCharScalar(0, new HiveVarchar(new String(red2), 8)); + expr.evaluate(batch); + + // only green qualifies, and it's in entry 1 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 1); + + batch = makeStringBatch(); + expr = new FilterStringGroupColGreaterEqualVarCharScalar(0, new HiveVarchar(new String(green), 12)); expr.evaluate(batch); // green and red qualify @@ -187,7 +3299,53 @@ public void testStringColCompareStringScalarProjection() { VectorizedRowBatch batch = makeStringBatch(); VectorExpression expr; - expr = new StringColEqualStringScalar(0, red2, 2); + expr = new StringGroupColEqualStringScalar(0, red2, 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + LongColumnVector outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(1, outVector.vector[0]); + Assert.assertEquals(0, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + + batch = makeStringBatch(); + expr = new StringGroupColEqualStringScalar(0, green, 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(0, outVector.vector[0]); + Assert.assertEquals(1, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + } + + @Test + public void testStringColCompareCharScalarProjection() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + + expr = new StringGroupColEqualCharScalar(0, new HiveChar(new String(red2), 8), 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + LongColumnVector outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(1, outVector.vector[0]); + Assert.assertEquals(0, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + + batch = makeStringBatch(); + expr = new StringGroupColEqualCharScalar(0, new HiveChar(new String(green), 10), 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(0, outVector.vector[0]); + Assert.assertEquals(1, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + } + + @Test + public void testStringColCompareVarCharScalarProjection() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + + expr = new StringGroupColEqualVarCharScalar(0, new HiveVarchar(new String(red2), 8), 2); expr.evaluate(batch); Assert.assertEquals(3, batch.size); LongColumnVector outVector = (LongColumnVector) batch.cols[2]; @@ -196,7 +3354,7 @@ public void testStringColCompareStringScalarProjection() { Assert.assertEquals(0, outVector.vector[2]); batch = makeStringBatch(); - expr = new StringColEqualStringScalar(0, green, 2); + expr = new StringGroupColEqualVarCharScalar(0, new HiveVarchar(new String(green), 10), 2); expr.evaluate(batch); Assert.assertEquals(3, batch.size); outVector = (LongColumnVector) batch.cols[2]; @@ -210,7 +3368,7 @@ public void testStringColCompareStringScalarProjection() { public void testStringScalarCompareStringCol() { VectorizedRowBatch batch = makeStringBatch(); VectorExpression expr; - expr = new FilterStringScalarEqualStringColumn(red2, 0); + expr = new FilterStringScalarEqualStringGroupColumn(red2, 0); expr.evaluate(batch); // only red qualifies, and it's in entry 0 @@ -218,7 +3376,7 @@ public void testStringScalarCompareStringCol() { Assert.assertTrue(batch.selected[0] == 0); batch = makeStringBatch(); - expr = new FilterStringScalarGreaterStringColumn(red2, 0); + expr = new FilterStringScalarGreaterStringGroupColumn(red2, 0); expr.evaluate(batch); // only green qualifies, and it's in entry 1 @@ -226,21 +3384,127 @@ public void testStringScalarCompareStringCol() { Assert.assertTrue(batch.selected[0] == 1); batch = makeStringBatch(); - expr = new FilterStringScalarLessEqualStringColumn(green, 0); + expr = new FilterStringScalarLessEqualStringGroupColumn(green, 0); + expr.evaluate(batch); + + // green and red qualify + Assert.assertTrue(batch.size == 2); + Assert.assertTrue(batch.selected[0] == 0); + Assert.assertTrue(batch.selected[1] == 1); + } + + @Test + // Test CHAR literal to string column comparison + public void testCharScalarCompareStringCol() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + expr = new FilterCharScalarEqualStringGroupColumn(new HiveChar(new String(red2), 8), 0); + expr.evaluate(batch); + + // only red qualifies, and it's in entry 0 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 0); + + batch = makeStringBatch(); + expr = new FilterCharScalarGreaterStringGroupColumn(new HiveChar(new String(red2), 8), 0); + expr.evaluate(batch); + + // only green qualifies, and it's in entry 1 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 1); + + batch = makeStringBatch(); + expr = new FilterCharScalarLessEqualStringGroupColumn(new HiveChar(new String(green), 10), 0); + expr.evaluate(batch); + + // green and red qualify + Assert.assertTrue(batch.size == 2); + Assert.assertTrue(batch.selected[0] == 0); + Assert.assertTrue(batch.selected[1] == 1); + } + + @Test + // Test VARCHAR literal to string column comparison + public void testVarCharScalarCompareStringCol() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + expr = new FilterVarCharScalarEqualStringGroupColumn(new HiveVarchar(new String(red2), 8), 0); + expr.evaluate(batch); + + // only red qualifies, and it's in entry 0 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 0); + + batch = makeStringBatch(); + expr = new FilterVarCharScalarGreaterStringGroupColumn(new HiveVarchar(new String(red2), 8), 0); + expr.evaluate(batch); + + // only green qualifies, and it's in entry 1 + Assert.assertTrue(batch.size == 1); + Assert.assertTrue(batch.selected[0] == 1); + + batch = makeStringBatch(); + expr = new FilterVarCharScalarLessEqualStringGroupColumn(new HiveVarchar(new String(green), 10), 0); + expr.evaluate(batch); + + // green and red qualify + Assert.assertTrue(batch.size == 2); + Assert.assertTrue(batch.selected[0] == 0); + Assert.assertTrue(batch.selected[1] == 1); + } + + @Test + public void testStringScalarCompareStringColProjection() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + + expr = new StringScalarEqualStringGroupColumn(red2, 0, 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + LongColumnVector outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(1, outVector.vector[0]); + Assert.assertEquals(0, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + + batch = makeStringBatch(); + expr = new StringScalarEqualStringGroupColumn(green, 0, 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(0, outVector.vector[0]); + Assert.assertEquals(1, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + } + + @Test + public void testCharScalarCompareStringColProjection() { + VectorizedRowBatch batch = makeStringBatch(); + VectorExpression expr; + + expr = new CharScalarEqualStringGroupColumn(new HiveChar(new String(red2), 8), 0, 2); + expr.evaluate(batch); + Assert.assertEquals(3, batch.size); + LongColumnVector outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(1, outVector.vector[0]); + Assert.assertEquals(0, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); + + batch = makeStringBatch(); + expr = new CharScalarEqualStringGroupColumn(new HiveChar(new String(green), 10), 0, 2); expr.evaluate(batch); - - // green and red qualify - Assert.assertTrue(batch.size == 2); - Assert.assertTrue(batch.selected[0] == 0); - Assert.assertTrue(batch.selected[1] == 1); + Assert.assertEquals(3, batch.size); + outVector = (LongColumnVector) batch.cols[2]; + Assert.assertEquals(0, outVector.vector[0]); + Assert.assertEquals(1, outVector.vector[1]); + Assert.assertEquals(0, outVector.vector[2]); } @Test - public void testStringScalarCompareStringColProjection() { + public void testVarCharScalarCompareStringColProjection() { VectorizedRowBatch batch = makeStringBatch(); VectorExpression expr; - expr = new StringScalarEqualStringColumn(red2, 0, 2); + expr = new VarCharScalarEqualStringGroupColumn(new HiveVarchar(new String(red2), 8), 0, 2); expr.evaluate(batch); Assert.assertEquals(3, batch.size); LongColumnVector outVector = (LongColumnVector) batch.cols[2]; @@ -249,7 +3513,7 @@ public void testStringScalarCompareStringColProjection() { Assert.assertEquals(0, outVector.vector[2]); batch = makeStringBatch(); - expr = new StringScalarEqualStringColumn(green, 0, 2); + expr = new VarCharScalarEqualStringGroupColumn(new HiveVarchar(new String(green), 10), 0, 2); expr.evaluate(batch); Assert.assertEquals(3, batch.size); outVector = (LongColumnVector) batch.cols[2]; @@ -257,7 +3521,6 @@ public void testStringScalarCompareStringColProjection() { Assert.assertEquals(1, outVector.vector[1]); Assert.assertEquals(0, outVector.vector[2]); } - @Test public void testStringColCompareStringColFilter() { VectorizedRowBatch batch; @@ -275,7 +3538,7 @@ public void testStringColCompareStringColFilter() { // nulls possible on left, right batch = makeStringBatchForColColCompare(); - expr = new FilterStringColLessStringColumn(0,1); + expr = new FilterStringGroupColLessStringGroupColumn(0,1); expr.evaluate(batch); Assert.assertEquals(1, batch.size); Assert.assertEquals(0, batch.selected[0]); @@ -428,7 +3691,7 @@ public void testStringColCompareStringColProjection() { // nulls possible on left, right batch = makeStringBatchForColColCompare(); - expr = new StringColLessStringColumn(0, 1, 3); + expr = new StringGroupColLessStringGroupColumn(0, 1, 3); expr.evaluate(batch); Assert.assertEquals(4, batch.size); outVector = ((LongColumnVector) batch.cols[3]).vector; @@ -1021,11 +4284,137 @@ public void testStringLikeMultiByte() throws HiveException { } @Test - public void testColConcatScalar() { + public void testColConcatStringScalar() { + + // has nulls, not repeating + VectorizedRowBatch batch = makeStringBatch(); + StringGroupColConcatStringScalar expr = new StringGroupColConcatStringScalar(0, red, 1); + expr.evaluate(batch); + BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; + + int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isNull[2]); + int cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + Assert.assertFalse(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // no nulls, not repeating + batch = makeStringBatch(); + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + + cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + + int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], + outCol.start[2], outCol.length[2]); + Assert.assertEquals(0, cmp3); + + Assert.assertTrue(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // has nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertFalse(outCol.noNulls); + + // no nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + } + + @Test + public void testColConcatCharScalar() { + + // has nulls, not repeating + VectorizedRowBatch batch = makeStringBatch(); + StringGroupColConcatCharScalar expr = new StringGroupColConcatCharScalar(0, new HiveChar(new String(red), 6), 1); + expr.evaluate(batch); + BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; + + int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isNull[2]); + int cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + Assert.assertFalse(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // no nulls, not repeating + batch = makeStringBatch(); + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + + cmp2 = StringExpr.compare(greenred, 0, greenred.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + + int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], + outCol.start[2], outCol.length[2]); + Assert.assertEquals(0, cmp3); + + Assert.assertTrue(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // has nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertFalse(outCol.noNulls); + + // no nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + } + + @Test + public void testColConcatVarCharScalar() { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch(); - StringConcatColScalar expr = new StringConcatColScalar(0, red, 1); + StringGroupColConcatVarCharScalar expr = new StringGroupColConcatVarCharScalar(0, new HiveVarchar(new String(red), 14), 1); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; @@ -1084,11 +4473,137 @@ public void testColConcatScalar() { } @Test - public void testScalarConcatCol() { + public void testStringScalarConcatCol() { + + // has nulls, not repeating + VectorizedRowBatch batch = makeStringBatch(); + StringScalarConcatStringGroupCol expr = new StringScalarConcatStringGroupCol(red, 0, 1); + expr.evaluate(batch); + BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; + + int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isNull[2]); + int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + Assert.assertFalse(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // no nulls, not repeating + batch = makeStringBatch(); + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + + cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + + int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], + outCol.start[2], outCol.length[2]); + Assert.assertEquals(0, cmp3); + + Assert.assertTrue(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // has nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertFalse(outCol.noNulls); + + // no nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + } + + @Test + public void testCharScalarConcatCol() { + + // has nulls, not repeating + VectorizedRowBatch batch = makeStringBatch(); + CharScalarConcatStringGroupCol expr = new CharScalarConcatStringGroupCol(new HiveChar(new String(red), 6), 0, 1); + expr.evaluate(batch); + BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; + + int cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isNull[2]); + int cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + Assert.assertFalse(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // no nulls, not repeating + batch = makeStringBatch(); + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + + cmp2 = StringExpr.compare(redgreen, 0, redgreen.length, outCol.vector[1], + outCol.start[1], outCol.length[1]); + Assert.assertEquals(0, cmp2); + + int cmp3 = StringExpr.compare(red, 0, red.length, outCol.vector[2], + outCol.start[2], outCol.length[2]); + Assert.assertEquals(0, cmp3); + + Assert.assertTrue(outCol.noNulls); + Assert.assertFalse(outCol.isRepeating); + + // has nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertFalse(outCol.noNulls); + + // no nulls, is repeating + batch = makeStringBatch(); + batch.cols[0].isRepeating = true; + batch.cols[0].noNulls = true; + expr.evaluate(batch); + outCol = (BytesColumnVector) batch.cols[1]; + cmp = StringExpr.compare(redred, 0, redred.length, outCol.vector[0], + outCol.start[0], outCol.length[0]); + Assert.assertEquals(0, cmp); + Assert.assertTrue(outCol.isRepeating); + Assert.assertTrue(outCol.noNulls); + } + + @Test + public void testVarCharScalarConcatCol() { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch(); - StringConcatScalarCol expr = new StringConcatScalarCol(red, 0, 1); + VarCharScalarConcatStringGroupCol expr = new VarCharScalarConcatStringGroupCol(new HiveVarchar(new String(red), 14), 0, 1); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[1]; @@ -1151,7 +4666,7 @@ public void testColConcatCol() { // has nulls, not repeating VectorizedRowBatch batch = makeStringBatch2In1Out(); - StringConcatColCol expr = new StringConcatColCol(0, 1, 2); + StringGroupConcatColCol expr = new StringGroupConcatColCol(0, 1, 2); expr.evaluate(batch); BytesColumnVector outCol = (BytesColumnVector) batch.cols[2]; diff --git ql/src/test/queries/clientpositive/vector_char_2.q ql/src/test/queries/clientpositive/vector_char_2.q new file mode 100644 index 0000000..0828ca1 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_2.q @@ -0,0 +1,49 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key char(10), + value char(20) +) stored as orc; + +insert overwrite table char_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +drop table char_2; diff --git ql/src/test/queries/clientpositive/vector_char_simple.q ql/src/test/queries/clientpositive/vector_char_simple.q new file mode 100644 index 0000000..ec46630 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_char_simple.q @@ -0,0 +1,43 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key char(10), + value char(20) +) stored as orc; + +insert overwrite table char_2 select * from src; + +select key, value +from src +order by key asc +limit 5; + +explain select key, value +from char_2 +order by key asc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key asc +limit 5; + +select key, value +from src +order by key desc +limit 5; + +explain select key, value +from char_2 +order by key desc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key desc +limit 5; + +drop table char_2; diff --git ql/src/test/queries/clientpositive/vector_varchar_simple.q ql/src/test/queries/clientpositive/vector_varchar_simple.q new file mode 100644 index 0000000..68d6b09 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_varchar_simple.q @@ -0,0 +1,43 @@ +SET hive.vectorized.execution.enabled=true; +drop table char_2; + +create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc; + +insert overwrite table char_2 select * from src; + +select key, value +from src +order by key asc +limit 5; + +explain select key, value +from char_2 +order by key asc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key asc +limit 5; + +select key, value +from src +order by key desc +limit 5; + +explain select key, value +from char_2 +order by key desc +limit 5; + +-- should match the query from src +select key, value +from char_2 +order by key desc +limit 5; + +drop table char_2; diff --git ql/src/test/results/clientpositive/tez/vector_char_2.q.out ql/src/test/results/clientpositive/tez/vector_char_2.q.out new file mode 100644 index 0000000..2e66485 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_2.q.out @@ -0,0 +1,298 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/tez/vector_char_simple.q.out ql/src/test/results/clientpositive/tez/vector_char_simple.q.out new file mode 100644 index 0000000..bac33ec --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_char_simple.q.out @@ -0,0 +1,236 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: + + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: - + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out new file mode 100644 index 0000000..f097414 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vector_varchar_simple.q.out @@ -0,0 +1,236 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: - + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_char_2.q.out ql/src/test/results/clientpositive/vector_char_2.q.out new file mode 100644 index 0000000..7d1512c --- /dev/null +++ ql/src/test/results/clientpositive/vector_char_2.q.out @@ -0,0 +1,304 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: char(20)), key (type: char(10)) + outputColumnNames: value, key + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(UDFToInteger(key)), count() + keys: value (type: char(20)) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: + + Map-reduce partition columns: _col0 (type: char(20)) + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: char(20)) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: char(20)), _col1 (type: bigint), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: char(20)) + sort order: - + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(20)), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_char_simple.q.out ql/src/test/results/clientpositive/vector_char_simple.q.out new file mode 100644 index 0000000..72dc8aa --- /dev/null +++ ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: + + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: char(10)), value (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(10)) + sort order: - + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: char(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(10)), VALUE._col0 (type: char(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 diff --git ql/src/test/results/clientpositive/vector_varchar_simple.q.out ql/src/test/results/clientpositive/vector_varchar_simple.q.out new file mode 100644 index 0000000..1c77c39 --- /dev/null +++ ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -0,0 +1,222 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key varchar(10), + value varchar(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value +from src +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select key, value +from src +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select key, value +from src +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: varchar(10)), value (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: - + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: varchar(20)) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select key, value +from char_2 +order by key desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +98 val_98 +98 val_98 +97 val_97 +97 val_97 +96 val_96 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2