diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index aaf2399..1783707 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -67,7 +67,7 @@ import org.apache.hadoop.hive.ql.udf.UDFHex; import org.apache.hadoop.hive.ql.udf.UDFHour; import org.apache.hadoop.hive.ql.udf.UDFJson; -import org.apache.hadoop.hive.ql.udf.UDFLength; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLength; import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.UDFLn; import org.apache.hadoop.hive.ql.udf.UDFLog; @@ -262,7 +262,10 @@ system.registerGenericUDF("trim", GenericUDFTrim.class); system.registerGenericUDF("ltrim", GenericUDFLTrim.class); system.registerGenericUDF("rtrim", GenericUDFRTrim.class); - system.registerUDF("length", UDFLength.class, false); + system.registerGenericUDF("length", GenericUDFLength.class); + system.registerGenericUDF("character_length", GenericUDFCharacterLength.class); + system.registerGenericUDF("char_length", GenericUDFCharacterLength.class); + system.registerGenericUDF("octet_length", GenericUDFOctetLength.class); system.registerUDF("reverse", UDFReverse.class, false); system.registerGenericUDF("field", GenericUDFField.class); system.registerUDF("find_in_set", UDFFindInSet.class, false); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java new file mode 100644 index 0000000..3d85772 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/OctetLength.java @@ -0,0 +1,149 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.expressions; + +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; + +public class OctetLength extends VectorExpression { + private static final long serialVersionUID = 1L; + private transient int colNum; + private transient int outputColumn; + + public OctetLength(int colNum, int outputColumn) { + this(); + this.colNum = colNum; + this.outputColumn = outputColumn; + } + + public OctetLength() { + super(); + } + + // Calculate the length of the UTF-8 strings in input vector and place results in output vector. + @Override + public void evaluate(VectorizedRowBatch batch) { + + if (childExpressions != null) { + super.evaluateChildren(batch); + } + + BytesColumnVector inputColVector = (BytesColumnVector) batch.cols[colNum]; + LongColumnVector outV = (LongColumnVector) batch.cols[outputColumn]; + int[] sel = batch.selected; + int n = batch.size; + int [] length = inputColVector.length; + long[] resultLen = outV.vector; + + if (n == 0) { + //Nothing to do + return; + } + + if (inputColVector.noNulls) { + outV.noNulls = true; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + resultLen[0] = length[0]; + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + resultLen[i] = length[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + resultLen[i] = length[i]; + } + outV.isRepeating = false; + } + } else { + + /* + * Handle case with nulls. Don't do function if the value is null, to save time, + * because calling the function can be expensive. + */ + outV.noNulls = false; + if (inputColVector.isRepeating) { + outV.isRepeating = true; + outV.isNull[0] = inputColVector.isNull[0]; + if (!inputColVector.isNull[0]) { + resultLen[0] = length[0]; + } + } else if (batch.selectedInUse) { + for(int j = 0; j != n; j++) { + int i = sel[j]; + if (!inputColVector.isNull[i]) { + resultLen[i] = length[i]; + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } else { + for(int i = 0; i != n; i++) { + if (!inputColVector.isNull[i]) { + resultLen[i] = length[i]; + } + outV.isNull[i] = inputColVector.isNull[i]; + } + outV.isRepeating = false; + } + } + } + + @Override + public int getOutputColumn() { + return outputColumn; + } + + @Override + public String getOutputType() { + return "Long"; + } + + public int getColNum() { + return colNum; + } + + public void setColNum(int colNum) { + this.colNum = colNum; + } + + public void setOutputColumn(int outputColumn) { + this.outputColumn = outputColumn; + } + + public String vectorExpressionParameters() { + return "col " + colNum; + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + VectorExpressionDescriptor.Builder b = new VectorExpressionDescriptor.Builder(); + b.setMode(VectorExpressionDescriptor.Mode.PROJECTION) + .setNumArguments(1) + .setArgumentTypes( + VectorExpressionDescriptor.ArgumentType.STRING_FAMILY) + .setInputExpressionTypes( + VectorExpressionDescriptor.InputExpressionType.COLUMN); + return b.build(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index fadbc20..fb1d325 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -70,10 +70,8 @@ import org.apache.hadoop.hive.ql.exec.vector.ColumnVector.Type; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; -import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; -import org.apache.hadoop.hive.ql.exec.vector.VectorSMBMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.HiveVectorAdaptorUsageMode; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext.InConstantType; @@ -168,7 +166,6 @@ import org.apache.hadoop.hive.ql.udf.UDFFromUnixTime; import org.apache.hadoop.hive.ql.udf.UDFHex; import org.apache.hadoop.hive.ql.udf.UDFHour; -import org.apache.hadoop.hive.ql.udf.UDFLength; import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.UDFLn; import org.apache.hadoop.hive.ql.udf.UDFLog; @@ -334,7 +331,9 @@ public Vectorizer() { supportedGenericUDFs.add(GenericUDFOPOr.class); supportedGenericUDFs.add(GenericUDFOPAnd.class); supportedGenericUDFs.add(GenericUDFOPEqual.class); - supportedGenericUDFs.add(UDFLength.class); + supportedGenericUDFs.add(GenericUDFLength.class); + supportedGenericUDFs.add(GenericUDFCharacterLength.class); + supportedGenericUDFs.add(GenericUDFOctetLength.class); supportedGenericUDFs.add(UDFYear.class); supportedGenericUDFs.add(UDFMonth.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java deleted file mode 100644 index 4bdcb0e..0000000 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLength.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf; - -import org.apache.hadoop.hive.ql.exec.Description; -import org.apache.hadoop.hive.ql.exec.UDF; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; -import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength; -import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; - -/** - * UDFLength. - * - */ -@Description(name = "length", - value = "_FUNC_(str | binary) - Returns the length of str or number of bytes in binary data", - extended = "Example:\n" - + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 8") -@VectorizedExpressions({StringLength.class}) -public class UDFLength extends UDF { - private final IntWritable result = new IntWritable(); - - public IntWritable evaluate(Text s) { - if (s == null) { - return null; - } - - byte[] data = s.getBytes(); - int len = 0; - for (int i = 0; i < s.getLength(); i++) { - if (GenericUDFUtils.isUtfStartByte(data[i])) { - len++; - } - } - - result.set(len); - return result; - } - - public IntWritable evaluate(BytesWritable bw){ - if (bw == null){ - return null; - -} - result.set(bw.getLength()); - return result; - } -} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCharacterLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCharacterLength.java new file mode 100644 index 0000000..5b8c68d --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCharacterLength.java @@ -0,0 +1,120 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; + +@Description(name = "character_length,char_length", + value = "_FUNC_(str | binary) - Returns the number of characters in str or binary data", + extended = "Example:\n" + + " > SELECT _FUNC_('안녕하세요') FROM src LIMIT 1;\n" + " 5") +@VectorizedExpressions({StringLength.class}) +public class GenericUDFCharacterLength extends GenericUDF { + private final IntWritable result = new IntWritable(); + private transient PrimitiveObjectInspector argumentOI; + private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter; + private transient boolean isInputString; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "CHARACTER_LENGTH requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentException( + "CHARACTER_LENGTH only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case CHAR: + case VARCHAR: + case STRING: + isInputString = true; + break; + + case BINARY: + isInputString = false; + break; + + default: + throw new UDFArgumentException( + " CHARACTER_LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got " + + inputType); + } + + outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + byte[] data = null; + if (isInputString) { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } else { + BytesWritable val = null; + if (arguments[0] != null) { + val = (BytesWritable) arguments[0].get(); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } + + int len = 0; + for (int i = 0; i < data.length; i++) { + if (GenericUDFUtils.isUtfStartByte(data[i])) { + len++; + } + } + result.set(len); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("character_length", children); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLength.java new file mode 100644 index 0000000..c852384 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLength.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringLength; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; + +/** + * GenericUDFLength. + * + */ +@Description(name = "length", + value = "_FUNC_(str | binary) - Returns the length of str or number of bytes in binary data", + extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 8") +@VectorizedExpressions({StringLength.class}) +public class GenericUDFLength extends GenericUDF { + private final IntWritable result = new IntWritable(); + private transient PrimitiveObjectInspector argumentOI; + private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter; + private transient boolean isInputString; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "LENGTH requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentException( + "LENGTH only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case CHAR: + case VARCHAR: + case STRING: + isInputString = true; + break; + + case BINARY: + isInputString = false; + break; + + default: + throw new UDFArgumentException( + " LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got " + + inputType); + } + + outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + byte[] data = null; + if (isInputString) { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } else { + BytesWritable val = null; + if (arguments[0] != null) { + val = (BytesWritable) arguments[0].get(); + } + if (val == null) { + return null; + } + } + + int len = 0; + for (int i = 0; i < data.length; i++) { + if (GenericUDFUtils.isUtfStartByte(data[i])) { + len++; + } + } + result.set(len); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("length", children); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOctetLength.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOctetLength.java new file mode 100644 index 0000000..2216e99 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFOctetLength.java @@ -0,0 +1,114 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; +import org.apache.hadoop.hive.ql.exec.vector.expressions.OctetLength; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; + +@Description(name = "octet_length", + value = "_FUNC_(str | binary) - Returns the number of bytes in str or binary data", + extended = "Example:\n" + + " > SELECT _FUNC_('안녕하세요') FROM src LIMIT 1;\n" + " 15") +@VectorizedExpressions({OctetLength.class}) +public class GenericUDFOctetLength extends GenericUDF { + private final IntWritable result = new IntWritable(); + private transient PrimitiveObjectInspector argumentOI; + private transient PrimitiveObjectInspectorConverter.StringConverter stringConverter; + private transient boolean isInputString; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "OCTET_LENGTH requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { + throw new UDFArgumentException( + "OCTET_LENGTH only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveObjectInspector.PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case CHAR: + case VARCHAR: + case STRING: + isInputString = true; + break; + + case BINARY: + isInputString = false; + break; + + default: + throw new UDFArgumentException( + " OCTET_LENGTH() only takes STRING/CHAR/VARCHAR/BINARY types as first argument, got " + + inputType); + } + + outputOI = PrimitiveObjectInspectorFactory.writableIntObjectInspector; + return outputOI; + } + + @Override + public Object evaluate(GenericUDF.DeferredObject[] arguments) throws HiveException { + byte[] data = null; + if (isInputString) { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } else { + BytesWritable val = null; + if (arguments[0] != null) { + val = (BytesWritable) arguments[0].get(); + } + if (val == null) { + return null; + } + + data = val.getBytes(); + } + + result.set(data.length); + return result; + } + + @Override + public String getDisplayString(String[] children) { + return getStandardDisplayString("octet_length", children); + } +} diff --git ql/src/test/queries/clientpositive/udf_character_length.q ql/src/test/queries/clientpositive/udf_character_length.q new file mode 100644 index 0000000..44447ba --- /dev/null +++ ql/src/test/queries/clientpositive/udf_character_length.q @@ -0,0 +1,24 @@ +set hive.fetch.task.conversion=more; + +DESCRIBE FUNCTION character_length; +DESCRIBE FUNCTION EXTENDED character_length; + +DESCRIBE FUNCTION char_length; +DESCRIBE FUNCTION EXTENDED char_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); + +EXPLAIN SELECT character_length(dest1.name) FROM dest1; +SELECT character_length(dest1.name) FROM dest1; + +EXPLAIN SELECT char_length(dest1.name) FROM dest1; +SELECT char_length(dest1.name) FROM dest1; diff --git ql/src/test/queries/clientpositive/udf_octet_length.q ql/src/test/queries/clientpositive/udf_octet_length.q new file mode 100644 index 0000000..6a8f200 --- /dev/null +++ ql/src/test/queries/clientpositive/udf_octet_length.q @@ -0,0 +1,17 @@ +set hive.fetch.task.conversion=more; + +DESCRIBE FUNCTION octet_length; +DESCRIBE FUNCTION EXTENDED octet_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); +EXPLAIN SELECT octet_length(dest1.name) FROM dest1; +SELECT octet_length(dest1.name) FROM dest1; diff --git ql/src/test/queries/clientpositive/vector_udf_character_length.q ql/src/test/queries/clientpositive/vector_udf_character_length.q new file mode 100644 index 0000000..1219d36 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_udf_character_length.q @@ -0,0 +1,25 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +DESCRIBE FUNCTION character_length; +DESCRIBE FUNCTION EXTENDED character_length; + +DESCRIBE FUNCTION char_length; +DESCRIBE FUNCTION EXTENDED char_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); + +EXPLAIN SELECT character_length(dest1.name) FROM dest1; +SELECT character_length(dest1.name) FROM dest1; + +EXPLAIN SELECT char_length(dest1.name) FROM dest1; +SELECT char_length(dest1.name) FROM dest1; diff --git ql/src/test/queries/clientpositive/vector_udf_octet_length.q ql/src/test/queries/clientpositive/vector_udf_octet_length.q new file mode 100644 index 0000000..cfac289 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_udf_octet_length.q @@ -0,0 +1,18 @@ +SET hive.vectorized.execution.enabled=true; +set hive.fetch.task.conversion=none; + +DESCRIBE FUNCTION octet_length; +DESCRIBE FUNCTION EXTENDED octet_length; + +CREATE TABLE dest1(len INT); +EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value); +SELECT dest1.* FROM dest1; +DROP TABLE dest1; + +-- Test with non-ascii characters. +CREATE TABLE dest1(name STRING) STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1; +INSERT INTO dest1 VALUES(NULL); +EXPLAIN SELECT octet_length(dest1.name) FROM dest1; +SELECT octet_length(dest1.name) FROM dest1; diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 3c9bb4a..2c327c7 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -43,6 +43,8 @@ case cbrt ceil ceiling +char_length +character_length chr coalesce collect_list @@ -168,6 +170,7 @@ not ntile nullif nvl +octet_length or parse_url parse_url_tuple @@ -269,6 +272,8 @@ case cbrt ceil ceiling +char_length +character_length chr coalesce collect_list diff --git ql/src/test/results/clientpositive/udf_character_length.q.out ql/src/test/results/clientpositive/udf_character_length.q.out new file mode 100644 index 0000000..bb70bb8 --- /dev/null +++ ql/src/test/results/clientpositive/udf_character_length.q.out @@ -0,0 +1,261 @@ +PREHOOK: query: DESCRIBE FUNCTION character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: char_length +Example: + > SELECT character_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: character_length +Example: + > SELECT char_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 +PREHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 diff --git ql/src/test/results/clientpositive/udf_length.q.out ql/src/test/results/clientpositive/udf_length.q.out index 07d2049..fc795bb 100644 --- ql/src/test/results/clientpositive/udf_length.q.out +++ ql/src/test/results/clientpositive/udf_length.q.out @@ -11,7 +11,7 @@ length(str | binary) - Returns the length of str or number of bytes in binary da Example: > SELECT length('Facebook') FROM src LIMIT 1; 8 -Function class:org.apache.hadoop.hive.ql.udf.UDFLength +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFLength Function type:BUILTIN PREHOOK: query: CREATE TABLE dest1(len INT) PREHOOK: type: CREATETABLE diff --git ql/src/test/results/clientpositive/udf_octet_length.q.out ql/src/test/results/clientpositive/udf_octet_length.q.out new file mode 100644 index 0000000..29fc02d --- /dev/null +++ ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -0,0 +1,213 @@ +PREHOOK: query: DESCRIBE FUNCTION octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +Example: + > SELECT octet_length('HUX8�') FROM src LIMIT 1; + 15 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFOctetLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + ListSink + +PREHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +6 diff --git ql/src/test/results/clientpositive/vector_udf_character_length.q.out ql/src/test/results/clientpositive/vector_udf_character_length.q.out new file mode 100644 index 0000000..38f22e2 --- /dev/null +++ ql/src/test/results/clientpositive/vector_udf_character_length.q.out @@ -0,0 +1,285 @@ +PREHOOK: query: DESCRIBE FUNCTION character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED character_length +POSTHOOK: type: DESCFUNCTION +character_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: char_length +Example: + > SELECT character_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: DESCRIBE FUNCTION char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED char_length +POSTHOOK: type: DESCFUNCTION +char_length(str | binary) - Returns the number of characters in str or binary data +Synonyms: character_length +Example: + > SELECT char_length('HUX8�') FROM src LIMIT 1; + 5 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFCharacterLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT character_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT character_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 +PREHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: character_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT char_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT char_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +2 diff --git ql/src/test/results/clientpositive/vector_udf_octet_length.q.out ql/src/test/results/clientpositive/vector_udf_octet_length.q.out new file mode 100644 index 0000000..eb436c7 --- /dev/null +++ ql/src/test/results/clientpositive/vector_udf_octet_length.q.out @@ -0,0 +1,225 @@ +PREHOOK: query: DESCRIBE FUNCTION octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +PREHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED octet_length +POSTHOOK: type: DESCFUNCTION +octet_length(str | binary) - Returns the number of bytes in str or binary data +Example: + > SELECT octet_length('HUX8�') FROM src LIMIT 1; + 15 +Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFOctetLength +Function type:BUILTIN +PREHOOK: query: CREATE TABLE dest1(len INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(len INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(value) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src1 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.len EXPRESSION [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +7 +0 +7 +6 +7 +7 +7 +7 +6 +7 +7 +7 +7 +7 +7 +0 +0 +6 +0 +7 +7 +7 +0 +0 +0 +PREHOOK: query: DROP TABLE dest1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest1 +PREHOOK: Output: default@dest1 +POSTHOOK: query: DROP TABLE dest1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest1 +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(name STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dest1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv4.txt' INTO TABLE dest1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dest1 +PREHOOK: query: INSERT INTO dest1 VALUES(NULL) +PREHOOK: type: QUERY +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT INTO dest1 VALUES(NULL) +POSTHOOK: type: QUERY +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: dest1 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: octet_length(name) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT octet_length(dest1.name) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +NULL +6