diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index bf78251..758de06 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2148,15 +2148,9 @@ private VectorExpression getCastToBoolean(List childExpr) // Long and double are handled using descriptors, string needs to be specially handled. if (isStringFamily(inputType)) { // string casts to false if it is 0 characters long, otherwise true - VectorExpression lenExpr = createVectorExpression(StringLength.class, childExpr, - VectorExpressionDescriptor.Mode.PROJECTION, null); - - int outputCol = ocm.allocateOutputColumn(TypeInfoFactory.longTypeInfo); - VectorExpression lenToBoolExpr = - new CastLongToBooleanViaLongToLong(lenExpr.getOutputColumn(), outputCol); - lenToBoolExpr.setChildExpressions(new VectorExpression[] {lenExpr}); - ocm.freeOutputColumn(lenExpr.getOutputColumn()); - return lenToBoolExpr; + VectorExpression lenExpr = createVectorExpression(CastStringToBoolean.class, childExpr, + VectorExpressionDescriptor.Mode.PROJECTION, TypeInfoFactory.booleanTypeInfo); + return lenExpr; } return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java new file mode 100644 index 0000000..adb9a76 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToBoolean.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +import java.nio.charset.StandardCharsets; + +/** + * Type cast string to boolean + */ +public class CastStringToBoolean extends FuncStringToLong { +// private final static byte[] TRUE = StandardCharsets.UTF_8.encode("TRUE").array(); + private final static byte[] FALSE = StandardCharsets.UTF_8.encode("FALSE").array(); + + public CastStringToBoolean() { + super(); + } + + public CastStringToBoolean(int inputColumn, int outputColumn) { + super(inputColumn, outputColumn); + } + + @Override + protected void func(LongColumnVector outV, BytesColumnVector inV, int offset) { + int start = inV.start[offset]; + int length = inV.length[offset]; + byte[] s = inV.vector[offset]; + if (length == FALSE.length) { + for (int i = 0; i < FALSE.length; i++) { + byte a = s[i + start]; + byte b = FALSE[i]; + byte c = 'a' - 'A'; + if ((a != b) && (a != (b + c))) { + outV.vector[offset] = 1; // true + return; + } + } + outV.vector[offset] = 0; // false + return; + } + outV.vector[offset] = length > 0 ? 1 : 0; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java new file mode 100644 index 0000000..7cfc998 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/FuncStringToLong.java @@ -0,0 +1,139 @@ +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +/** + * Superclass to support vectorized functions that take a long + * and return a string, optionally with additional configuration arguments. + * Used for cast(string), length(string), etc + */ +public abstract class FuncStringToLong extends VectorExpression { + private static final long serialVersionUID = 1L; + + private int inputCol; + private int outputCol; + + public FuncStringToLong(int inputCol, int outputCol) { + this.inputCol = inputCol; + this.outputCol = outputCol; + } + + public FuncStringToLong() { + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inV = (BytesColumnVector) batch.cols[inputCol]; + int[] sel = batch.selected; + int n = batch.size; + LongColumnVector outV = (LongColumnVector) batch.cols[outputCol]; + + if (n == 0) { + //Nothing to do + return; + } + + if (inV.noNulls) { + outV.noNulls = true; + if (inV.isRepeating) { + outV.isRepeating = true; + func(outV, inV, 0); + } else if (batch.selectedInUse) { + for(int j=0; j != n; j++) { + int i = sel[j]; + func(outV, inV, i); + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + func(outV, inV, i); + } + outV.isRepeating = false; + } + } else { + // Handle case with nulls. Don't do function if the value is null, to save time, + // because calling the function can be expensive. + outV.noNulls = false; + if (inV.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inV.isNull[0]; + if (!inV.isNull[0]) { + func(outV, inV, 0); + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + outV.isNull[i] = inV.isNull[i]; + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } else { + System.arraycopy(inV.isNull, 0, outV.isNull, 0, n); + for(int i = 0; i != n; i++) { + if (!inV.isNull[i]) { + func(outV, inV, i); + } + } + outV.isRepeating = false; + } + } + } + + /* Evaluate result for position i (using bytes[] to avoid storage allocation costs) + * and set position i of the output vector to the result. + */ + abstract protected void func(LongColumnVector outV, BytesColumnVector inV, int i); + + @Override + public int getOutputColumn() { + return outputCol; + } + + public int getOutputCol() { + return outputCol; + } + + public void setOutputCol(int outputCol) { + this.outputCol = outputCol; + } + + public int getInputCol() { + return inputCol; + } + + public void setInputCol(int inputCol) { + this.inputCol = inputCol; + } + + @Override + public String getOutputType() { + return "Long"; + } + + @Override + public String vectorExpressionParameters() { + return "col " + inputCol; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java index cdaf694..ddb42b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/StringLength.java @@ -27,148 +27,31 @@ * Calculate the length of the strings in the input column vector, and store * it in the output column vector. */ -public class StringLength extends VectorExpression { - private static final long serialVersionUID = 1L; - private int colNum; - private int outputColumn; - - public StringLength(int colNum, int outputColumn) { - this(); - this.colNum = colNum; - this.outputColumn = outputColumn; +public class StringLength extends FuncStringToLong { + public StringLength(int colNum, int outputColNum) { + super(colNum, outputColNum); } public StringLength() { super(); } - // Calculate the length of the UTF-8 strings in input vector and place results in output vector. @Override - public void evaluate(VectorizedRowBatch batch) { - - if (childExpressions != null) { - super.evaluateChildren(batch); - } - - BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; - LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; - int[] sel = batch.selected; - int n = batch.size; - byte[][] vector = inputColVector.vector; - int [] start = inputColVector.start; - int [] length = inputColVector.length; - long[] resultLen = outV.vector; - - if (n == 0) { - //Nothing to do - return; - } - - if (inputColVector.noNulls) { - outV.noNulls = true; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); - } - outV.isRepeating = false; - } - } else { - - /* - * Handle case with nulls. Don't do function if the value is null, to save time, - * because calling the function can be expensive. - */ - outV.noNulls = false; - if (inputColVector.isRepeating) { - outV.isRepeating = true; - outV.isNull[0] = inputColVector.isNull[0]; - if (!inputColVector.isNull[0]) { - resultLen[0] = utf8StringLength(vector[0], start[0], length[0]); - } - } else if (batch.selectedInUse) { - for(int j = 0; j != n; j++) { - int i = sel[j]; - if (!inputColVector.isNull[i]) { - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); - } - outV.isNull[i] = inputColVector.isNull[i]; - } - outV.isRepeating = false; - } else { - for(int i = 0; i != n; i++) { - if (!inputColVector.isNull[i]) { - resultLen[i] = utf8StringLength(vector[i], start[i], length[i]); - } - outV.isNull[i] = inputColVector.isNull[i]; - } - outV.isRepeating = false; - } - } - } - - /* - * Return length in characters of UTF8 string in byte array - * beginning at start that is len bytes long. - */ - static long utf8StringLength(byte[] s, int start, int len) { + protected void func(LongColumnVector outV, BytesColumnVector inV, int i) { long resultLength = 0; - for (int i = start; i < start + len; i++) { + int start = inV.start[i]; + int len = inV.length[i]; + byte[] s = inV.vector[i]; + for (int j = start; j < start + len; j++) { /* Byte bit patterns of the form 10xxxxxx are continuation * bytes. All other bit patterns are the first byte of * a character. */ - if ((s[i] & 0xc0) != 0x80) { + if ((s[j] & 0xc0) != 0x80) { resultLength++; } } - return resultLength; - } - - @Override - public int getOutputColumn() { - return outputColumn; - } - - @Override - public String getOutputType() { - return "Long"; - } - - public int getColNum() { - return colNum; - } - - public void setColNum(int colNum) { - this.colNum = colNum; - } - - public void setOutputColumn(int outputColumn) { - this.outputColumn = outputColumn; - } - - public String vectorExpressionParameters() { - return "col " + colNum; - } - - @Override - public VectorExpressionDescriptor.Descriptor getDescriptor() { - VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); - b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) - .setNumArguments(1) - .setArgumentTypes( - VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) - .setInputExpressionTypes( - VectorExpressionDescriptor.InputExpressionType.COLUMN); - return b.build(); + outV.vector[i] = resultLength; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index 0cc0c9e..1195d45 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.CastDecimalToBoolean; +import org.apache.hadoop.hive.ql.exec.vector.expressions.CastStringToBoolean; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDoubleToBooleanViaDoubleToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastLongToBooleanViaLongToLong; import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.CastDateToBooleanViaLongToLong; @@ -170,10 +171,17 @@ public BooleanWritable evaluate(DoubleWritable i) { public BooleanWritable evaluate(Text i) { if (i == null) { return null; - } else { - booleanWritable.set(i.getLength() != 0); + } + if (i.toString().equalsIgnoreCase("true")) { + booleanWritable.set(true); + return booleanWritable; + } + if (i.toString().equalsIgnoreCase("false")) { + booleanWritable.set(false); return booleanWritable; } + booleanWritable.set(i.getLength() != 0); + return booleanWritable; } public BooleanWritable evaluate(DateWritable d) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java index 31add6e..64f16ee 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorMathFunctions.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.exec.vector.expressions; import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.sql.Timestamp; import java.util.Arrays; import java.util.Random; @@ -231,6 +232,29 @@ public static VectorizedRowBatch getVectorizedRowBatchLongInLongOut() { return batch; } + public static VectorizedRowBatch getVectorizedRowBatchStringInLongOut() { + VectorizedRowBatch batch = new VectorizedRowBatch(2); + BytesColumnVector inV; + LongColumnVector outV; + inV = new BytesColumnVector(); + outV = new LongColumnVector(); + inV.initBuffer(); + inV.setVal(0, StandardCharsets.UTF_8.encode("true").array()); + inV.setVal(1, StandardCharsets.UTF_8.encode("TRUE").array()); + inV.setVal(2, StandardCharsets.UTF_8.encode("TrUe").array()); + inV.setVal(3, StandardCharsets.UTF_8.encode("false").array()); + inV.setVal(4, StandardCharsets.UTF_8.encode("FALSE").array()); + inV.setVal(5, StandardCharsets.UTF_8.encode("FaLsE").array()); + inV.setVal(6, StandardCharsets.UTF_8.encode("").array()); + inV.setVal(7, StandardCharsets.UTF_8.encode("Other").array()); + + batch.cols[0] = inV; + batch.cols[1] = outV; + + batch.size = 8; + return batch; + } + public static VectorizedRowBatch getVectorizedRowBatchTimestampInLongOut(long[] longValues) { Random r = new Random(345); VectorizedRowBatch batch = new VectorizedRowBatch(2); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java index c7d9fae..a7e21b5 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/expressions/TestVectorTypeCasts.java @@ -109,6 +109,23 @@ public void testCastLongToBoolean() { } @Test + public void testCastStringToBoolean() { + VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchStringInLongOut(); + LongColumnVector resultV = (LongColumnVector) b.cols[1]; + b.cols[0].noNulls = true; + VectorExpression expr = new CastStringToBoolean(0, 1); + expr.evaluate(b); + Assert.assertEquals(1, resultV.vector[0]); // true + Assert.assertEquals(1, resultV.vector[1]); // true + Assert.assertEquals(1, resultV.vector[2]); // true + Assert.assertEquals(0, resultV.vector[3]); // false + Assert.assertEquals(0, resultV.vector[4]); // false + Assert.assertEquals(0, resultV.vector[5]); // false + Assert.assertEquals(0, resultV.vector[6]); // false + Assert.assertEquals(1, resultV.vector[7]); // true + } + + @Test public void testCastLongToTimestamp() { long[] longValues = new long[500]; VectorizedRowBatch b = TestVectorMathFunctions.getVectorizedRowBatchLongInTimestampOut(longValues); diff --git ql/src/test/queries/clientpositive/udf_to_boolean.q ql/src/test/queries/clientpositive/udf_to_boolean.q index 8bea7ab..1a50d05 100644 --- ql/src/test/queries/clientpositive/udf_to_boolean.q +++ ql/src/test/queries/clientpositive/udf_to_boolean.q @@ -12,6 +12,9 @@ SELECT CAST(CAST(-8.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(-99.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('Foo' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows); @@ -27,6 +30,9 @@ SELECT CAST(CAST(0.0 AS DOUBLE) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(0.0 AS DECIMAL) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST('' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); +SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows); SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows); diff --git ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q new file mode 100644 index 0000000..3b31c93 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_udf_string_to_boolean.q @@ -0,0 +1,19 @@ +set hive.mapred.mode=nonstrict; +SET hive.vectorized.execution.enabled = true; +SET hive.int.timestamp.conversion.in.seconds=false; +set hive.fetch.task.conversion=none; + +create table t (s string) stored as orc; + +insert into t values ('false'); +insert into t values ('FALSE'); +insert into t values ('FaLsE'); +insert into t values ('true'); +insert into t values ('TRUE'); +insert into t values ('TrUe'); +insert into t values (''); +insert into t values ('Other'); + +explain SELECT CAST(s AS BOOLEAN) FROM t; + +SELECT CAST(s AS BOOLEAN) FROM t; diff --git ql/src/test/results/clientpositive/udf_to_boolean.q.out ql/src/test/results/clientpositive/udf_to_boolean.q.out index ebce364..bee030a 100644 --- ql/src/test/results/clientpositive/udf_to_boolean.q.out +++ ql/src/test/results/clientpositive/udf_to_boolean.q.out @@ -70,6 +70,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### true +PREHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('TRUE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('true' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('TrUe' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true PREHOOK: query: SELECT CAST(CAST('2011-05-06 07:08:09' as timestamp) AS BOOLEAN) FROM src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -151,6 +178,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### false +PREHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('FALSE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +false +PREHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('false' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +false +PREHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(CAST('FaLsE' AS STRING) AS BOOLEAN) FROM src tablesample (1 rows) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +false PREHOOK: query: SELECT CAST(CAST(0 as timestamp) AS BOOLEAN) FROM src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git ql/src/test/results/clientpositive/vector_empty_where.q.out ql/src/test/results/clientpositive/vector_empty_where.q.out index 97a0f6a..3f63062 100644 --- ql/src/test/results/clientpositive/vector_empty_where.q.out +++ ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -26,7 +26,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsTrue(col 13)(children: CastLongToBooleanViaLongToLong(col 12)(children: StringLength(col 6) -> 12:Long) -> 13:long) -> boolean + predicateExpression: SelectColumnIsTrue(col 12)(children: CastStringToBoolean(col 6) -> 12:Long) -> boolean predicate: cstring1 (type: string) Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE Select Operator diff --git ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out new file mode 100644 index 0000000..161eb1d --- /dev/null +++ ql/src/test/results/clientpositive/vector_udf_string_to_boolean.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: create table t (s string) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (s string) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values ('false') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('false') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('FALSE') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('FALSE') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('FaLsE') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('FaLsE') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('true') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('true') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('TRUE') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('TRUE') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('TrUe') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('TrUe') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__7)values__tmp__table__7.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into t values ('Other') +PREHOOK: type: QUERY +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values ('Other') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.s SIMPLE [(values__tmp__table__8)values__tmp__table__8.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: explain SELECT CAST(s AS BOOLEAN) FROM t +PREHOOK: type: QUERY +POSTHOOK: query: explain SELECT CAST(s AS BOOLEAN) FROM t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 8 Data size: 704 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToBoolean(s) (type: boolean) + outputColumnNames: _col0 + Statistics: Num rows: 8 Data size: 704 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 8 Data size: 704 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT CAST(s AS BOOLEAN) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(s AS BOOLEAN) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +false +false +false +true +true +true +false +true