diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java new file mode 100644 index 0000000..b230410 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import org.apache.commons.lang.StringUtils; + +public abstract class HiveBaseChar { + protected String value; + protected int characterLength = -1; + + protected HiveBaseChar() { + } + + /** + * Sets the string value to a new value, obeying the max length defined for this object. + * @param val new value + */ + public void setValue(String val, int maxLength) { + characterLength = -1; + value = HiveBaseChar.enforceMaxLength(val, maxLength); + } + + public void setValue(HiveBaseChar val, int maxLength) { + if ((maxLength < 0) + || (val.characterLength > 0 && val.characterLength <= maxLength)) { + // No length enforcement required, or source length is less than max length. + // We can copy the source value as-is. + value = val.value; + this.characterLength = val.characterLength; + } else { + setValue(val.value, maxLength); + } + } + + public static String enforceMaxLength(String val, int maxLength) { + String value = val; + + if (maxLength > 0) { + int valLength = val.codePointCount(0, val.length()); + if (valLength > maxLength) { + // Truncate the excess trailing spaces to fit the character length. + // Also make sure we take supplementary chars into account. + value = val.substring(0, val.offsetByCodePoints(0, maxLength)); + } + } + return value; + } + + public String getValue() { + return value; + } + + public int getCharacterLength() { + if (characterLength < 0) { + characterLength = value.codePointCount(0, value.length()); + } + return characterLength; + } +} diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java new file mode 100644 index 0000000..36c6879 --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +/** + * + * HiveVarChar. + * String wrapper to support SQL VARCHAR features. + * Max string length is enforced. + * + */ +public class HiveVarchar extends HiveBaseChar + implements Comparable { + + public static final int MAX_VARCHAR_LENGTH = 65535; + + public HiveVarchar() { + } + + public HiveVarchar(String val, int len) { + setValue(val, len); + } + + public HiveVarchar(HiveVarchar hc, int len) { + setValue(hc, len); + } + + /** + * Set the new value + */ + public void setValue(String val) { + super.setValue(val, -1); + } + + public void setValue(HiveVarchar hc) { + super.setValue(hc.getValue(), -1); + } + + @Override + public String toString() { + return getValue(); + } + + public int compareTo(HiveVarchar rhs) { + if (rhs == this) { + return 0; + } + return this.getValue().compareTo(rhs.getValue()); + } + + public boolean equals(HiveVarchar rhs) { + if (rhs == this) { + return true; + } + return this.getValue().equals(rhs.getValue()); + } + + @Override + public int hashCode() { + return getValue().hashCode(); + } +} diff --git common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java new file mode 100644 index 0000000..6f9b0bb --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.LogUtils; +import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Random; + + +public class TestHiveVarchar extends TestCase { + public TestHiveVarchar() { + super(); + } + + static Random rnd = new Random(); + + public static int getRandomSupplementaryChar() { + int lowSurrogate = 0xDC00 + rnd.nextInt(1024); + //return 0xD8000000 + lowSurrogate; + int highSurrogate = 0xD800; + return Character.toCodePoint((char)highSurrogate, (char)lowSurrogate); + } + + public static int getRandomCodePoint() { + int codePoint; + if (rnd.nextDouble() < 0.50) { + codePoint = 32 + rnd.nextInt(90); + } else { + codePoint = getRandomSupplementaryChar(); + } + if (!Character.isValidCodePoint(codePoint)) { + System.out.println(Integer.toHexString(codePoint) + " is not a valid code point"); + } + return codePoint; + } + + public static int getRandomCodePoint(int excludeChar) { + while (true) { + int codePoint = getRandomCodePoint(); + if (codePoint != excludeChar) { + return codePoint; + } + } + } + + public void testStringLength() throws Exception { + int strLen = 20; + int[] lengths = { 15, 20, 25 }; + // Try with supplementary characters + for (int idx1 = 0; idx1 < lengths.length; ++idx1) { + // Create random test string + StringBuffer sb = new StringBuffer(); + int curLen = lengths[idx1]; + for (int idx2 = 0; idx2 < curLen; ++idx2) { + sb.appendCodePoint(getRandomCodePoint(' ')); + } + String testString = sb.toString(); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + + // Try with ascii chars + String[] testStrings = { + "abcdefg", + "abcdefghijklmnopqrst", + "abcdefghijklmnopqrstuvwxyz" + }; + for (String testString : testStrings) { + int curLen = testString.length(); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + } + + public void testComparison() throws Exception { + HiveVarchar hc1 = new HiveVarchar("abcd", 20); + HiveVarchar hc2 = new HiveVarchar("abcd", 20); + + // Identical strings should be equal + assertTrue(hc1.equals(hc2)); + assertTrue(hc2.equals(hc1)); + assertEquals(0, hc1.compareTo(hc2)); + assertEquals(0, hc2.compareTo(hc1)); + + // Unequal strings + hc2 = new HiveVarchar("abcde", 20); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Trailing spaces are significant + hc2 = new HiveVarchar("abcd ", 30); + + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Leading spaces are significant + hc2 = new HiveVarchar(" abcd", 20); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + } +} diff --git data/files/datatypes.txt data/files/datatypes.txt index aabdb6e..10daa1b 100644 --- data/files/datatypes.txt +++ data/files/datatypes.txt @@ -1,3 +1,3 @@ -\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N --1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N -1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01 +\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N +-1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N\N\N\N +1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456YWJjZA==2013-01-01abc123 diff --git data/files/vc1.txt data/files/vc1.txt new file mode 100644 index 0000000..d0f9952 --- /dev/null +++ data/files/vc1.txt @@ -0,0 +1,3 @@ +1abc +2abc +3 abc diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 074a203..173484d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -39,6 +39,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -159,6 +160,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.ReflectionUtils; import org.w3c.dom.Document; @@ -361,6 +363,8 @@ GenericUDFToBinary.class); registerGenericUDF(serdeConstants.DECIMAL_TYPE_NAME, GenericUDFToDecimal.class); + registerGenericUDF(serdeConstants.VARCHAR_TYPE_NAME, + GenericUDFToVarchar.class); // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); @@ -626,6 +630,17 @@ static void registerNumericType(PrimitiveCategory primitiveCategory, int level) registerNumericType(PrimitiveCategory.STRING, 8); } + static int getCommonLength(int aLen, int bLen) { + int maxLength; + if (aLen < 0 || bLen < 0) { + // negative length should take precedence over positive value? + maxLength = -1; + } else { + maxLength = Math.max(aLen, bLen); + } + return maxLength; + } + /** * Given 2 TypeInfo types and the PrimitiveCategory selected as the common class between the two, * return a TypeInfo corresponding to the common PrimitiveCategory, and with type qualifiers @@ -643,6 +658,16 @@ public static TypeInfo getTypeInfoForPrimitiveCategory( // For types with parameters (like varchar), we need to determine the type parameters // that should be added to this type, based on the original 2 TypeInfos. switch (typeCategory) { + case VARCHAR: + int maxLength = getCommonLength( + TypeInfoUtils.getCharacterLengthForType(a), + TypeInfoUtils.getCharacterLengthForType(b)); + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(maxLength); + // Generate type name so that we can retrieve the TypeInfo for that type. + String typeName = PrimitiveObjectInspectorUtils + .getTypeEntryFromTypeSpecs(typeCategory, varcharParams).toString(); + return TypeInfoFactory.getPrimitiveTypeInfo(typeName); default: // Type doesn't require any qualifiers. @@ -840,7 +865,6 @@ public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { return false; } - /** * Get the GenericUDAF evaluator for the name and argumentClasses. * @@ -1019,7 +1043,8 @@ public static Object invoke(Method m, Object thisObject, Object... arguments) */ public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) { - if (argumentAccepted.equals(argumentPassed)) { + if (argumentAccepted.equals(argumentPassed) + || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, argumentAccepted)) { // matches return 0; } @@ -1468,6 +1493,7 @@ private static boolean isOpCast(ExprNodeDesc desc) { udfClass == UDFToDouble.class || udfClass == UDFToFloat.class || udfClass == UDFToInteger.class || udfClass == UDFToLong.class || udfClass == UDFToShort.class || udfClass == UDFToString.class || + udfClass == GenericUDFToVarchar.class || udfClass == GenericUDFTimestamp.class || udfClass == GenericUDFToBinary.class || udfClass == GenericUDFToDate.class; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index fa2c1e2..a7cc4b9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -629,7 +629,7 @@ protected static String getTypeStringFromAST(ASTNode typeNode) case HiveParser.TOK_UNIONTYPE: return getUnionTypeStringFromAST(typeNode); default: - return DDLSemanticAnalyzer.getTypeName(typeNode.getType()); + return DDLSemanticAnalyzer.getTypeName(typeNode); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 932ec66..8f5a99d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -38,6 +38,7 @@ import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -127,6 +128,10 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.TextInputFormat; @@ -148,6 +153,7 @@ TokenToTypeName.put(HiveParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, serdeConstants.DATETIME_TYPE_NAME); @@ -155,12 +161,27 @@ TokenToTypeName.put(HiveParser.TOK_DECIMAL, serdeConstants.DECIMAL_TYPE_NAME); } - public static String getTypeName(int token) throws SemanticException { + public static String getTypeName(ASTNode node) throws SemanticException { + int token = node.getType(); + String typeName; + // datetime type isn't currently supported if (token == HiveParser.TOK_DATETIME) { throw new SemanticException(ErrorMsg.UNSUPPORTED_TYPE.getMsg()); } - return TokenToTypeName.get(token); + + switch (token) { + case HiveParser.TOK_VARCHAR: + PrimitiveCategory primitiveCategory = PrimitiveCategory.VARCHAR; + typeName = TokenToTypeName.get(token); + VarcharTypeParams varcharParams = ParseUtils.getVarcharParams(typeName, node); + typeName = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, varcharParams).toString(); + break; + default: + typeName = TokenToTypeName.get(token); + } + return typeName; } static class TablePartition { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index ff65c8f..ca667d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -105,6 +105,7 @@ KW_DATETIME: 'DATETIME'; KW_TIMESTAMP: 'TIMESTAMP'; KW_DECIMAL: 'DECIMAL'; KW_STRING: 'STRING'; +KW_VARCHAR: 'VARCHAR'; KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; KW_MAP: 'MAP'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 36d62a6..b7b18da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -110,6 +110,7 @@ TOK_DATELITERAL; TOK_DATETIME; TOK_TIMESTAMP; TOK_STRING; +TOK_VARCHAR; TOK_BINARY; TOK_DECIMAL; TOK_LIST; @@ -1771,6 +1772,7 @@ primitiveType | KW_STRING -> TOK_STRING | KW_BINARY -> TOK_BINARY | KW_DECIMAL -> TOK_DECIMAL + | KW_VARCHAR LPAREN length=Number RPAREN -> ^(TOK_VARCHAR $length) ; listType diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 9e15d27..c8c5f63 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -20,14 +20,17 @@ import java.util.ArrayList; import java.util.Iterator; +import java.util.LinkedList; import java.util.List; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; /** @@ -116,7 +119,9 @@ static ExprNodeDesc createConversionCast(ExprNodeDesc column, PrimitiveTypeInfo typeParams = tableFieldTypeInfo.getTypeParams(); if (typeParams != null) { switch (tableFieldTypeInfo.getPrimitiveCategory()) { - // No parameterized types yet + case VARCHAR: + // Nothing to do here - the parameter will be passed to the UDF factory method below + break; default: throw new SemanticException("Type cast for " + tableFieldTypeInfo.getPrimitiveCategory() + " does not take type parameters"); @@ -132,4 +137,22 @@ static ExprNodeDesc createConversionCast(ExprNodeDesc column, PrimitiveTypeInfo return ret; } + + public static VarcharTypeParams getVarcharParams(String typeName, ASTNode node) + throws SemanticException { + if (node.getChildCount() != 1) { + throw new SemanticException("Bad params for type " + typeName); + } + + try { + VarcharTypeParams typeParams = new VarcharTypeParams(); + String lengthStr = node.getChild(0).getText(); + Integer length = Integer.valueOf(lengthStr); + typeParams.setLength(length.intValue()); + typeParams.validateParams(); + return typeParams; + } catch (SerDeException err) { + throw new SemanticException(err); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index a912882..767f545 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -562,6 +563,8 @@ public static ColumnExprProcessor getColumnExprProcessor() { serdeConstants.DOUBLE_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR, + serdeConstants.VARCHAR_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_DATE, @@ -783,8 +786,14 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, if (isFunction) { ASTNode funcNameNode = (ASTNode)expr.getChild(0); switch (funcNameNode.getType()) { - // Get type param from AST and add to cast function. - // But, no parameterized types to handle at the moment + case HiveParser.TOK_VARCHAR: + // Add type params + VarcharTypeParams varcharTypeParams = new VarcharTypeParams(); + varcharTypeParams.length = Integer.valueOf((funcNameNode.getChild(0).getText())); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setParams(varcharTypeParams); + } + break; default: // Do nothing break; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index 25d04e1..c8eae5b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -25,10 +25,13 @@ import java.util.Map; import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.JavaUtils; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.ql.ErrorMsg; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; @@ -36,7 +39,8 @@ import org.apache.hadoop.hive.ql.parse.ParseUtils; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /** * CreateTableDesc. @@ -45,6 +49,7 @@ @Explain(displayName = "Create Table") public class CreateTableDesc extends DDLDesc implements Serializable { private static final long serialVersionUID = 1L; + private static Log LOG = LogFactory.getLog(CreateTableDesc.class); String databaseName; String tableName; boolean isExternal; @@ -461,12 +466,15 @@ public void validate() while (partColsIter.hasNext()) { FieldSchema fs = partColsIter.next(); String partCol = fs.getName(); - PrimitiveObjectInspectorUtils.PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils - .getTypeEntryFromTypeName( - fs.getType()); - if(null == pte){ + TypeInfo pti = null; + try { + pti = TypeInfoFactory.getPrimitiveTypeInfo(fs.getType()); + } catch (Exception err) { + LOG.error(err); + } + if(null == pti){ throw new SemanticException(ErrorMsg.PARTITION_COLUMN_NON_PRIMITIVE.getMsg() + " Found " - + partCol + " of type: " + fs.getType()); + + partCol + " of type: " + fs.getType()); } Iterator colNamesIter = colNames.iterator(); while (colNamesIter.hasNext()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java index f4ffde5..6538add 100755 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExprNodeConstantDesc.java @@ -66,7 +66,8 @@ public ConstantObjectInspector getWritableObjectInspector() { .getPrimitiveJavaObjectInspector(pc).getPrimitiveWritableObject( getValue()); return PrimitiveObjectInspectorFactory - .getPrimitiveWritableConstantObjectInspector(pc, writableValue); + .getPrimitiveWritableConstantObjectInspector( + (PrimitiveTypeInfo) getTypeInfo(), writableValue); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java index cb519e5..2ae2a6e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/GenericUDFEncode.java @@ -19,6 +19,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; @@ -30,8 +32,8 @@ "is null, the result will also be null") public class GenericUDFEncode extends GenericUDF { private transient CharsetEncoder encoder = null; - private transient StringObjectInspector stringOI = null; - private transient StringObjectInspector charsetOI = null; + private transient PrimitiveObjectInspector stringOI = null; + private transient PrimitiveObjectInspector charsetOI = null; private transient BytesWritable result = new BytesWritable(); @Override @@ -41,23 +43,27 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } if (arguments[0].getCategory() != Category.PRIMITIVE || - ((PrimitiveObjectInspector)arguments[0]).getPrimitiveCategory() != PrimitiveCategory.STRING){ - throw new UDFArgumentTypeException(0, "The first argument to Encode() must be a string"); + PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector)arguments[0]).getPrimitiveCategory())){ + throw new UDFArgumentTypeException( + 0, "The first argument to Encode() must be a string/varchar"); } - stringOI = (StringObjectInspector) arguments[0]; + stringOI = (PrimitiveObjectInspector) arguments[0]; if (arguments[1].getCategory() != Category.PRIMITIVE || - ((PrimitiveObjectInspector)arguments[1]).getPrimitiveCategory() != PrimitiveCategory.STRING){ - throw new UDFArgumentTypeException(1, "The second argument to Encode() must be a string"); + PrimitiveGrouping.STRING_GROUP != PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector)arguments[1]).getPrimitiveCategory())){ + throw new UDFArgumentTypeException( + 1, "The second argument to Encode() must be a string/varchar"); } - charsetOI = (StringObjectInspector) arguments[1]; + charsetOI = (PrimitiveObjectInspector) arguments[1]; // If the character set for encoding is constant, we can optimize that - StringObjectInspector charSetOI = (StringObjectInspector) arguments[1]; - if (charSetOI instanceof ConstantObjectInspector){ - String charSetName = ((Text) ((ConstantObjectInspector) charSetOI).getWritableConstantValue()).toString(); + if (charsetOI instanceof ConstantObjectInspector){ + String charSetName = + ((ConstantObjectInspector) arguments[1]).getWritableConstantValue().toString(); encoder = Charset.forName(charSetName).newEncoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT); } @@ -68,7 +74,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { - String value = stringOI.getPrimitiveJavaObject(arguments[0].get()); + String value = PrimitiveObjectInspectorUtils.getString(arguments[0].get(), stringOI); if (value == null) { return null; } @@ -81,7 +87,8 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { throw new HiveException(e); } } else { - encoded = Charset.forName(charsetOI.getPrimitiveJavaObject(arguments[1].get())).encode(value); + encoded = Charset.forName( + PrimitiveObjectInspectorUtils.getString(arguments[1].get(), charsetOI)).encode(value); } result.setSize(encoded.limit()); encoded.get(result.getBytes(), 0, encoded.limit()); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java index 17a79f0..6a4272a 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 23a3c0c..63110bb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -83,6 +83,7 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) case DOUBLE: return new GenericUDAFDoubleStatsEvaluator(); case STRING: + case VARCHAR: return new GenericUDAFStringStatsEvaluator(); case BINARY: return new GenericUDAFBinaryStatsEvaluator(); @@ -102,12 +103,12 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" @@ -115,17 +116,17 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) private transient StructObjectInspector soi; private transient StructField countTruesField; - private WritableLongObjectInspector countTruesFieldOI; + private transient WritableLongObjectInspector countTruesFieldOI; private transient StructField countFalsesField; - private WritableLongObjectInspector countFalsesFieldOI; + private transient WritableLongObjectInspector countFalsesFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) @@ -305,13 +306,13 @@ public Object terminate(AggregationBuffer agg) throws HiveException { /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" @@ -319,23 +320,23 @@ public Object terminate(AggregationBuffer agg) throws HiveException { private transient StructObjectInspector soi; private transient StructField minField; - private WritableLongObjectInspector minFieldOI; + private transient WritableLongObjectInspector minFieldOI; private transient StructField maxField; - private WritableLongObjectInspector maxFieldOI; + private transient WritableLongObjectInspector maxFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private WritableStringObjectInspector ndvFieldOI; + private transient WritableStringObjectInspector ndvFieldOI; private transient StructField numBitVectorsField; - private WritableIntObjectInspector numBitVectorsFieldOI; + private transient WritableIntObjectInspector numBitVectorsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -601,13 +602,13 @@ public Object terminate(AggregationBuffer agg) throws HiveException { /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the long * field within the struct - "count" @@ -615,23 +616,23 @@ public Object terminate(AggregationBuffer agg) throws HiveException { private transient StructObjectInspector soi; private transient StructField minField; - private WritableDoubleObjectInspector minFieldOI; + private transient WritableDoubleObjectInspector minFieldOI; private transient StructField maxField; - private WritableDoubleObjectInspector maxFieldOI; + private transient WritableDoubleObjectInspector maxFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private WritableStringObjectInspector ndvFieldOI; + private transient WritableStringObjectInspector ndvFieldOI; private transient StructField numBitVectorsField; - private WritableIntObjectInspector numBitVectorsFieldOI; + private transient WritableIntObjectInspector numBitVectorsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -898,13 +899,13 @@ public Object terminate(AggregationBuffer agg) throws HiveException { /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the * fields within the struct - "maxLength", "sumLength", "count", "countNulls", "ndv" @@ -912,26 +913,26 @@ public Object terminate(AggregationBuffer agg) throws HiveException { private transient StructObjectInspector soi; private transient StructField maxLengthField; - private WritableLongObjectInspector maxLengthFieldOI; + private transient WritableLongObjectInspector maxLengthFieldOI; private transient StructField sumLengthField; - private WritableLongObjectInspector sumLengthFieldOI; + private transient WritableLongObjectInspector sumLengthFieldOI; private transient StructField countField; - private WritableLongObjectInspector countFieldOI; + private transient WritableLongObjectInspector countFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; private transient StructField ndvField; - private WritableStringObjectInspector ndvFieldOI; + private transient WritableStringObjectInspector ndvFieldOI; private transient StructField numBitVectorsField; - private WritableIntObjectInspector numBitVectorsFieldOI; + private transient WritableIntObjectInspector numBitVectorsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { @@ -1217,12 +1218,12 @@ public Object terminate(AggregationBuffer agg) throws HiveException { /* Object Inspector corresponding to the input parameter. */ - private PrimitiveObjectInspector inputOI; + private transient PrimitiveObjectInspector inputOI; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". */ - private Object[] partialResult; + private transient Object[] partialResult; /* Object Inspectors corresponding to the struct returned by TerminatePartial and the * fields within the struct - "maxLength", "sumLength", "count", "countNulls" @@ -1230,20 +1231,20 @@ public Object terminate(AggregationBuffer agg) throws HiveException { private transient StructObjectInspector soi; private transient StructField maxLengthField; - private WritableLongObjectInspector maxLengthFieldOI; + private transient WritableLongObjectInspector maxLengthFieldOI; private transient StructField sumLengthField; - private WritableLongObjectInspector sumLengthFieldOI; + private transient WritableLongObjectInspector sumLengthFieldOI; private transient StructField countField; - private WritableLongObjectInspector countFieldOI; + private transient WritableLongObjectInspector countFieldOI; private transient StructField countNullsField; - private WritableLongObjectInspector countNullsFieldOI; + private transient WritableLongObjectInspector countNullsFieldOI; /* Output of final result of the aggregation */ - private Object[] result; + private transient Object[] result; @Override public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java index a05b277..5c00d36 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java @@ -133,9 +133,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen TypeInfo oiTypeInfo0 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]); TypeInfo oiTypeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]); - if (oiTypeInfo0 != oiTypeInfo1) { + if (oiTypeInfo0 == oiTypeInfo1 + || TypeInfoUtils.doPrimitiveCategoriesMatch(oiTypeInfo0, oiTypeInfo1)) { + compareType = CompareType.SAME_TYPE; + } else { compareType = CompareType.NEED_CONVERT; - TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(oiTypeInfo0, oiTypeInfo1); + TypeInfo compareType = FunctionRegistry.getCommonClassForComparison( + oiTypeInfo0, oiTypeInfo1); + // For now, we always convert to double if we can't find a common type compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( (compareType == null) ? @@ -143,8 +148,6 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen converter0 = ObjectInspectorConverters.getConverter(arguments[0], compareOI); converter1 = ObjectInspectorConverters.getConverter(arguments[1], compareOI); - } else { - compareType = CompareType.SAME_TYPE; } } return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java index feb9215..8763942 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcatWS.java @@ -27,7 +27,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; @@ -61,15 +65,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen for (int i = 0; i < arguments.length; i++) { switch(arguments[i].getCategory()) { case LIST: - if (((ListObjectInspector)arguments[i]).getListElementObjectInspector() - .getTypeName().equals(serdeConstants.STRING_TYPE_NAME) - || ((ListObjectInspector)arguments[i]).getListElementObjectInspector() - .getTypeName().equals(serdeConstants.VOID_TYPE_NAME)) { - break; + if (isStringOrVoidType( + ((ListObjectInspector) arguments[i]).getListElementObjectInspector())) { + break; } case PRIMITIVE: - if (arguments[i].getTypeName().equals(serdeConstants.STRING_TYPE_NAME) - || arguments[i].getTypeName().equals(serdeConstants.VOID_TYPE_NAME)) { + if (isStringOrVoidType(arguments[i])) { break; } default: @@ -84,6 +85,18 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } + protected boolean isStringOrVoidType(ObjectInspector oi) { + if (oi.getCategory() == Category.PRIMITIVE) { + if (PrimitiveGrouping.STRING_GROUP + == PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector) oi).getPrimitiveCategory()) + || ((PrimitiveObjectInspector) oi).getPrimitiveCategory() == PrimitiveCategory.VOID) { + return true; + } + } + return false; + } + private final Text resultText = new Text(); @Override @@ -91,8 +104,8 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { if (arguments[0].get() == null) { return null; } - String separator = ((StringObjectInspector) argumentOIs[0]) - .getPrimitiveJavaObject(arguments[0].get()); + String separator = PrimitiveObjectInspectorUtils.getString( + arguments[0].get(), (PrimitiveObjectInspector)argumentOIs[0]); StringBuilder sb = new StringBuilder(); boolean first = true; @@ -116,8 +129,8 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { sb.append(strArrayOI.getListElement(strArray, j)); } } else { - sb.append(((StringObjectInspector) argumentOIs[i]) - .getPrimitiveJavaObject(arguments[i].get())); + sb.append(PrimitiveObjectInspectorUtils.getString( + arguments[i].get(), (PrimitiveObjectInspector)argumentOIs[i])); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java index 033ee1f..5ba2ec5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; @@ -93,8 +94,12 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen try { method = findMethod(targetClass, methodName.toString(), null, true); + // While getTypeFor() returns a TypeEntry, we won't actually be able to get any + // type parameter information from this since the TypeEntry is derived from a return type. + PrimitiveTypeEntry typeEntry = getTypeFor(method.getReturnType()); returnOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - getTypeFor(method.getReturnType()).primitiveCategory); + PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + typeEntry.primitiveCategory, typeEntry.typeParams)); returnObj = (Writable) returnOI.getPrimitiveWritableClass().newInstance(); } catch (Exception e) { throw new UDFArgumentException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java index c24a50d..4695cd5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFStringToMap.java @@ -24,9 +24,15 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -41,30 +47,30 @@ + " delimiters are used: ',' as delimiter1 and '=' as delimiter2.") public class GenericUDFStringToMap extends GenericUDF { private final HashMap ret = new HashMap(); - private transient StringObjectInspector soi_text, soi_de1 = null, soi_de2 = null; + private transient Converter soi_text, soi_de1 = null, soi_de2 = null; final static String default_de1 = ","; final static String default_de2 = ":"; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (!TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]).equals( - TypeInfoFactory.stringTypeInfo) - || (arguments.length > 1 && - !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]).equals( - TypeInfoFactory.stringTypeInfo)) - || (arguments.length > 2 && - !TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[2]).equals( - TypeInfoFactory.stringTypeInfo))) { - throw new UDFArgumentException("All argument should be string"); + for (int idx = 0; idx < Math.min(arguments.length, 3); ++idx) { + if (arguments[idx].getCategory() != Category.PRIMITIVE + || PrimitiveObjectInspectorUtils.getPrimitiveGrouping( + ((PrimitiveObjectInspector) arguments[idx]).getPrimitiveCategory()) + != PrimitiveGrouping.STRING_GROUP) { + throw new UDFArgumentException("All argument should be string/character type"); + } } - - soi_text = (StringObjectInspector) arguments[0]; + soi_text = ObjectInspectorConverters.getConverter(arguments[0], + PrimitiveObjectInspectorFactory.javaStringObjectInspector); if (arguments.length > 1) { - soi_de1 = (StringObjectInspector) arguments[1]; + soi_de1 = ObjectInspectorConverters.getConverter(arguments[1], + PrimitiveObjectInspectorFactory.javaStringObjectInspector); } if (arguments.length > 2) { - soi_de2 = (StringObjectInspector) arguments[2]; + soi_de2 = ObjectInspectorConverters.getConverter(arguments[2], + PrimitiveObjectInspectorFactory.javaStringObjectInspector); } return ObjectInspectorFactory.getStandardMapObjectInspector( @@ -75,11 +81,11 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { ret.clear(); - String text = soi_text.getPrimitiveJavaObject(arguments[0].get()); + String text = (String) soi_text.convert(arguments[0].get()); String delimiter1 = (soi_de1 == null) ? - default_de1 : soi_de1.getPrimitiveJavaObject(arguments[1].get()); + default_de1 : (String) soi_de1.convert(arguments[1].get()); String delimiter2 = (soi_de2 == null) ? - default_de2 : soi_de2.getPrimitiveJavaObject(arguments[2].get()); + default_de2 : (String) soi_de2.convert(arguments[2].get()); String[] keyValuePairs = text.split(delimiter1); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java index 099e3b0..9188c38 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDate.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.DateConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; /** * GenericUDFToDate @@ -49,10 +51,11 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen } try { argumentOI = (PrimitiveObjectInspector) arguments[0]; - switch (argumentOI.getPrimitiveCategory()) { - case DATE: - case STRING: - case TIMESTAMP: + PrimitiveGrouping pg = + PrimitiveObjectInspectorUtils.getPrimitiveGrouping(argumentOI.getPrimitiveCategory()); + switch (pg) { + case DATE_GROUP: + case STRING_GROUP: break; default: throw new UDFArgumentException( diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java new file mode 100644 index 0000000..509a392 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.Serializable; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.HiveVarcharConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +@Description(name = "varchar", +value = "CAST( as VARCHAR(length)) - Converts the argument to a varchar value.", +extended = "Values will be truncated if the input value is too long to fit" ++ " within the varchar length." ++ "Example:\n " ++ " > SELECT CAST(1234 AS varchar(10)) FROM src LIMIT 1;\n" ++ " '1234'") +public class GenericUDFToVarchar extends GenericUDF + implements SettableUDF, Serializable { + private static final Log LOG = LogFactory.getLog(GenericUDFToVarchar.class.getName()); + private transient PrimitiveObjectInspector argumentOI; + private transient HiveVarcharConverter converter; + + // The varchar type parameters need to be set prior to initialization, + // and must be preserved when the plan serialized to other processes. + private VarcharTypeParams typeParams; + + public GenericUDFToVarchar() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentException("VARCHAR cast requires a value argument"); + } + try { + argumentOI = (PrimitiveObjectInspector) arguments[0]; + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function VARCHAR takes only primitive types"); + } + + // Check if this UDF has been provided with type params for the output varchar type + SettableHiveVarcharObjectInspector outputOI; + if (typeParams != null) { + outputOI = (SettableHiveVarcharObjectInspector) + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + PrimitiveCategory.VARCHAR, typeParams)); + } else { + outputOI = PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector; + } + + converter = new HiveVarcharConverter(argumentOI, outputOI); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + return converter.convert(o0); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 1); + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS VARCHAR("); + String paramsStr = ""; + if (typeParams != null) { + paramsStr = typeParams.toString(); + } + sb.append(paramsStr); + sb.append(")"); + return sb.toString(); + } + +/** + * Provide varchar type parameters for the output object inspector. + * This should be done before the UDF is initialized. + */ + @Override + public void setParams(Object typeParams) throws UDFArgumentException { + if (converter != null) { + LOG.warn("Type converter already initialized, setting type params now will not be useful"); + } + if (typeParams instanceof VarcharTypeParams) { + this.typeParams = (VarcharTypeParams)typeParams; + } else { + throw new UDFArgumentException( + "Was expecting VarcharTypeParams, instead got " + typeParams.getClass().getName()); + } + } + + @Override + public Object getParams() { + return typeParams; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java index d8052c8..6815195 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java @@ -29,17 +29,21 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.Text; /** @@ -350,6 +354,69 @@ public ConversionHelper(Method m, ObjectInspector[] parameterOIs) }; /** + * Helper class for UDFs returning string/varchar/char + */ + public static class StringHelper { + + protected Object returnValue; + protected PrimitiveCategory type; + + public StringHelper(PrimitiveCategory type) throws UDFArgumentException { + this.type = type; + switch (type) { + case STRING: + returnValue = new Text(); + break; + case VARCHAR: + returnValue = new HiveVarcharWritable(); + break; + default: + throw new UDFArgumentException("Unexpected non-string type " + type); + } + } + + public Object setReturnValue(String val) throws UDFArgumentException { + if (val == null) { + return null; + } + switch (type) { + case STRING: + ((Text)returnValue).set(val); + return returnValue; + case VARCHAR: + ((HiveVarcharWritable)returnValue).set(val); + return returnValue; + default: + throw new UDFArgumentException("Bad return type " + type); + } + } + + /** + * Helper function to help GenericUDFs determine the return type + * character length for char/varchar. + * @param poi PrimitiveObjectInspector representing the type + * @return character length of the type + * @throws UDFArgumentException + */ + public static int getFixedStringSizeForType(PrimitiveObjectInspector poi) + throws UDFArgumentException { + // TODO: we can support date, int, .. any types which would have a fixed length value + switch (poi.getPrimitiveCategory()) { + case VARCHAR: + VarcharTypeParams varcharParams = null; + varcharParams = (VarcharTypeParams) poi.getTypeParams(); + if (varcharParams == null || varcharParams.length < 0) { + throw new UDFArgumentException("varchar type used without type params"); + } + return varcharParams.length; + default: + throw new UDFArgumentException("No fixed size for type " + poi.getTypeName()); + } + } + + } + + /** * Return an ordinal from an integer. */ public static String getOrdinal(int i) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index eb51fb4..50613f3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.ql.exec; -import java.lang.reflect.Type; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.LinkedList; @@ -27,6 +26,7 @@ import junit.framework.Assert; import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -35,6 +35,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -44,6 +45,7 @@ public class TestUDF { public void same(DoubleWritable x, DoubleWritable y) {} public void same(HiveDecimalWritable x, HiveDecimalWritable y) {} + public void same(Text x, Text y) {} public void one(IntWritable x, HiveDecimalWritable y) {} public void one(IntWritable x, DoubleWritable y) {} public void one(IntWritable x, IntWritable y) {} @@ -57,8 +59,16 @@ public void typeaffinity2(IntWritable x) {} public void typeaffinity2(DoubleWritable x) {} } + TypeInfo varchar5; + TypeInfo varchar10; + TypeInfo maxVarchar; + @Override protected void setUp() { + String maxVarcharTypeName = "varchar(" + HiveVarchar.MAX_VARCHAR_LENGTH + ")"; + maxVarchar = TypeInfoFactory.getPrimitiveTypeInfo(maxVarcharTypeName); + varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); + varchar5 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)"); } private void implicit(TypeInfo a, TypeInfo b, boolean convertible) { @@ -72,6 +82,21 @@ public void testImplicitConversion() { implicit(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, true); implicit(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.decimalTypeInfo, false); implicit(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.decimalTypeInfo, false); + implicit(varchar10, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar10, true); + + // Try with parameterized varchar types + TypeInfo varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); + TypeInfo varchar20 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(20)"); + + implicit(varchar10, TypeInfoFactory.stringTypeInfo, true); + implicit(varchar20, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar10, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar20, true); + implicit(varchar20, varchar10, true); + + implicit(TypeInfoFactory.intTypeInfo, varchar10, true); + implicit(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo, true); } private static List getMethods(Class udfClass, String methodName) { @@ -136,8 +161,8 @@ private void verify(Class udf, String name, TypeInfo ta, TypeInfo tb, } assert(!throwException); assertEquals(2, result.getParameterTypes().length); - assertEquals(result.getParameterTypes()[0], a); - assertEquals(result.getParameterTypes()[1], b); + assertEquals(a, result.getParameterTypes()[0]); + assertEquals(b, result.getParameterTypes()[1]); } public void testGetMethodInternal() { @@ -166,12 +191,15 @@ public void testGetMethodInternal() { verify(TestUDF.class, "one", TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, IntWritable.class, IntWritable.class, false); + // Passing varchar arguments should prefer the version of evaluate() with Text args. + verify(TestUDF.class, "same", varchar5, varchar10, Text.class, Text.class, false); + verify(TestUDF.class, "mismatch", TypeInfoFactory.voidTypeInfo, TypeInfoFactory.intTypeInfo, null, null, true); } private void common(TypeInfo a, TypeInfo b, TypeInfo result) { - assertEquals(FunctionRegistry.getCommonClass(a,b), result); + assertEquals(result, FunctionRegistry.getCommonClass(a,b)); } public void testCommonClass() { @@ -183,10 +211,13 @@ public void testCommonClass() { TypeInfoFactory.decimalTypeInfo); common(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + common(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + common(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); } private void comparison(TypeInfo a, TypeInfo b, TypeInfo result) { - assertEquals(FunctionRegistry.getCommonClassForComparison(a,b), result); + assertEquals(result, FunctionRegistry.getCommonClassForComparison(a,b)); } public void testCommonClassComparison() { @@ -198,6 +229,61 @@ public void testCommonClassComparison() { TypeInfoFactory.decimalTypeInfo); comparison(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.doubleTypeInfo); + + comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.stringTypeInfo); + + comparison(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + comparison(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + comparison(varchar5, varchar10, varchar10); + } + + /** + * Method to print out the comparison/conversion behavior for data types. + */ + public void testPrintTypeCompatibility() { + if (true) { + return; + } + + String[] typeStrings = { + "void", "boolean", "tinyint", "smallint", "int", "bigint", "float", "double", + "string", "timestamp", "date", "binary", "decimal", "varchar(10)", "varchar(5)", + }; + for (String cat1 : typeStrings) { + TypeInfo ti1 = null; + try { + ti1 = TypeInfoUtils.getTypeInfoFromTypeString(cat1); + } catch (Exception err) { + System.out.println(err); + System.out.println("Unable to get TypeInfo for " + cat1 + ", skipping ..."); + continue; + } + + for (String cat2 : typeStrings) { + TypeInfo commonClass = null; + boolean implicitConvertable = false; + try { + TypeInfo ti2 = TypeInfoUtils.getTypeInfoFromTypeString(cat2); + try { + commonClass = FunctionRegistry.getCommonClassForComparison(ti1, ti2); + //implicitConvertable = FunctionRegistry.implicitConvertable(ti1, ti2); + } catch (Exception err) { + System.out.println("Failed to get common class for " + ti1 + ", " + ti2 + ": " + err); + err.printStackTrace(); + //System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ..."); + } + System.out.println(cat1 + " - " + cat2 + ": " + commonClass); + //System.out.println(cat1 + " - " + cat2 + ": " + implicitConvertable); + } catch (Exception err) { + System.out.println(err); + System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ..."); + continue; + } + } + } } private void unionAll(TypeInfo a, TypeInfo b, TypeInfo result) { @@ -213,11 +299,26 @@ public void testCommonClassUnionAll() { TypeInfoFactory.decimalTypeInfo); unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + unionAll(varchar5, varchar10, varchar10); + unionAll(varchar10, varchar5, varchar10); + unionAll(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + unionAll(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); } public void testGetTypeInfoForPrimitiveCategory() { + // varchar should take string length into account. + // varchar(5), varchar(10) => varchar(10) + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar5, (PrimitiveTypeInfo) varchar10, PrimitiveCategory.VARCHAR)); + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) varchar5, PrimitiveCategory.VARCHAR)); + // non-qualified types should simply return the TypeInfo associated with that type assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + PrimitiveCategory.STRING)); + assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, PrimitiveCategory.STRING)); diff --git ql/src/test/queries/clientnegative/invalid_varchar_length_1.q ql/src/test/queries/clientnegative/invalid_varchar_length_1.q new file mode 100644 index 0000000..43de018 --- /dev/null +++ ql/src/test/queries/clientnegative/invalid_varchar_length_1.q @@ -0,0 +1,2 @@ +drop table if exists invalid_varchar_length_1; +create table invalid_varchar_length_1 (c1 varchar(1000000)); diff --git ql/src/test/queries/clientnegative/invalid_varchar_length_2.q ql/src/test/queries/clientnegative/invalid_varchar_length_2.q new file mode 100644 index 0000000..3c199d3 --- /dev/null +++ ql/src/test/queries/clientnegative/invalid_varchar_length_2.q @@ -0,0 +1 @@ +select cast(value as varchar(100000)) from src limit 1; diff --git ql/src/test/queries/clientnegative/invalid_varchar_length_3.q ql/src/test/queries/clientnegative/invalid_varchar_length_3.q new file mode 100644 index 0000000..fed0476 --- /dev/null +++ ql/src/test/queries/clientnegative/invalid_varchar_length_3.q @@ -0,0 +1,3 @@ +drop table if exists invalid_varchar_length_3; +create table invalid_varchar_length_3 (c1 varchar(0)); + diff --git ql/src/test/queries/clientpositive/alter_varchar1.q ql/src/test/queries/clientpositive/alter_varchar1.q new file mode 100644 index 0000000..fa9d99e --- /dev/null +++ ql/src/test/queries/clientpositive/alter_varchar1.q @@ -0,0 +1,32 @@ +drop table alter_varchar_1; + +create table alter_varchar_1 (key string, value string); +insert overwrite table alter_varchar_1 + select key, value from src limit 5; + +select * from alter_varchar_1; + +-- change column to varchar +alter table alter_varchar_1 change column value value varchar(20); +-- contents should still look the same +select * from alter_varchar_1; + +-- change column to smaller varchar +alter table alter_varchar_1 change column value value varchar(3); +-- value column should be truncated now +select * from alter_varchar_1; + +-- change back to bigger varchar +alter table alter_varchar_1 change column value value varchar(20); +-- column values should be full size again +select * from alter_varchar_1; + +-- add varchar column +alter table alter_varchar_1 add columns (key2 int, value2 varchar(10)); +select * from alter_varchar_1; + +insert overwrite table alter_varchar_1 + select key, value, key, value from src limit 5; +select * from alter_varchar_1; + +drop table alter_varchar_1; diff --git ql/src/test/queries/clientpositive/ctas_varchar.q ql/src/test/queries/clientpositive/ctas_varchar.q new file mode 100644 index 0000000..f646b36 --- /dev/null +++ ql/src/test/queries/clientpositive/ctas_varchar.q @@ -0,0 +1,22 @@ +drop table ctas_varchar_1; +drop table ctas_varchar_2; +drop view ctas_varchar_3; + +create table ctas_varchar_1 (key varchar(10), value string); +insert overwrite table ctas_varchar_1 + select key, value from src sort by key, value limit 5; + +-- create table as with varchar column +create table ctas_varchar_2 as select key, value from ctas_varchar_1; + +-- view with varchar column +create view ctas_varchar_3 as select key, value from ctas_varchar_2; + +select key, value from ctas_varchar_1; +select * from ctas_varchar_2; +select * from ctas_varchar_3; + + +drop table ctas_varchar_1; +drop table ctas_varchar_2; +drop view ctas_varchar_3; diff --git ql/src/test/queries/clientpositive/partition_varchar1.q ql/src/test/queries/clientpositive/partition_varchar1.q new file mode 100644 index 0000000..d700b1c --- /dev/null +++ ql/src/test/queries/clientpositive/partition_varchar1.q @@ -0,0 +1,43 @@ +drop table partition_varchar_1; + +create table partition_varchar_1 (key string, value varchar(20)) partitioned by (dt varchar(10), region int); + +insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=1) + select * from src limit 10; +insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=2) + select * from src limit 5; +insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=1) + select * from src limit 20; +insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=10) + select * from src limit 11; + +select distinct dt from partition_varchar_1; +select * from partition_varchar_1 where dt = '2000-01-01' and region = 2 order by key,value; + +-- 15 +select count(*) from partition_varchar_1 where dt = '2000-01-01'; +-- 5 +select count(*) from partition_varchar_1 where dt = '2000-01-01' and region = 2; +-- 11 +select count(*) from partition_varchar_1 where dt = '2013-08-08' and region = 10; +-- 30 +select count(*) from partition_varchar_1 where region = 1; +-- 0 +select count(*) from partition_varchar_1 where dt = '2000-01-01' and region = 3; +-- 0 +select count(*) from partition_varchar_1 where dt = '1999-01-01'; + +-- Try other comparison operations + +-- 20 +select count(*) from partition_varchar_1 where dt > '2000-01-01' and region = 1; +-- 10 +select count(*) from partition_varchar_1 where dt < '2000-01-02' and region = 1; +-- 20 +select count(*) from partition_varchar_1 where dt >= '2000-01-02' and region = 1; +-- 10 +select count(*) from partition_varchar_1 where dt <= '2000-01-01' and region = 1; +-- 20 +select count(*) from partition_varchar_1 where dt <> '2000-01-01' and region = 1; + +drop table partition_varchar_1; diff --git ql/src/test/queries/clientpositive/varchar_1.q ql/src/test/queries/clientpositive/varchar_1.q new file mode 100644 index 0000000..bda12fe --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_1.q @@ -0,0 +1,32 @@ +drop table varchar1; +drop table varchar1_1; + +create table varchar1 (key varchar(10), value varchar(20)); +create table varchar1_1 (key string, value string); + +-- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1; +select * from varchar1 limit 2; + +-- insert overwrite, from same/different length varchar +insert overwrite table varchar1 + select cast(key as varchar(10)), cast(value as varchar(15)) from src limit 2; +select key, value from varchar1; + +-- insert overwrite, from string +insert overwrite table varchar1 + select key, value from src limit 2; +select key, value from varchar1; + +-- insert string from varchar +insert overwrite table varchar1_1 + select key, value from varchar1 limit 2; +select key, value from varchar1_1; + +-- respect string length +insert overwrite table varchar1 + select key, cast(value as varchar(3)) from src limit 2; +select key, value from varchar1; + +drop table varchar1; +drop table varchar1_1; diff --git ql/src/test/queries/clientpositive/varchar_2.q ql/src/test/queries/clientpositive/varchar_2.q new file mode 100644 index 0000000..1eb3b85 --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_2.q @@ -0,0 +1,36 @@ +drop table varchar_2; + +create table varchar_2 ( + key varchar(10), + value varchar(20) +); + +insert overwrite table varchar_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value desc +limit 5; + +drop table varchar_2; diff --git ql/src/test/queries/clientpositive/varchar_cast.q ql/src/test/queries/clientpositive/varchar_cast.q new file mode 100644 index 0000000..550f3dc --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_cast.q @@ -0,0 +1,84 @@ + +-- Cast from varchar to other data types +select + cast(cast('11' as string) as tinyint), + cast(cast('11' as string) as smallint), + cast(cast('11' as string) as int), + cast(cast('11' as string) as bigint), + cast(cast('11.00' as string) as float), + cast(cast('11.00' as string) as double), + cast(cast('11.00' as string) as decimal) +from src limit 1; + +select + cast(cast('11' as varchar(10)) as tinyint), + cast(cast('11' as varchar(10)) as smallint), + cast(cast('11' as varchar(10)) as int), + cast(cast('11' as varchar(10)) as bigint), + cast(cast('11.00' as varchar(10)) as float), + cast(cast('11.00' as varchar(10)) as double), + cast(cast('11.00' as varchar(10)) as decimal) +from src limit 1; + +select + cast(cast('2011-01-01' as string) as date), + cast(cast('2011-01-01 01:02:03' as string) as timestamp) +from src limit 1; + +select + cast(cast('2011-01-01' as varchar(10)) as date), + cast(cast('2011-01-01 01:02:03' as varchar(30)) as timestamp) +from src limit 1; + +-- no tests from string/varchar to boolean, that conversion doesn't look useful +select + cast(cast('abc123' as string) as string), + cast(cast('abc123' as string) as varchar(10)) +from src limit 1; + +select + cast(cast('abc123' as varchar(10)) as string), + cast(cast('abc123' as varchar(10)) as varchar(10)) +from src limit 1; + +-- cast from other types to varchar +select + cast(cast(11 as tinyint) as string), + cast(cast(11 as smallint) as string), + cast(cast(11 as int) as string), + cast(cast(11 as bigint) as string), + cast(cast(11.00 as float) as string), + cast(cast(11.00 as double) as string), + cast(cast(11.00 as decimal) as string) +from src limit 1; + +select + cast(cast(11 as tinyint) as varchar(10)), + cast(cast(11 as smallint) as varchar(10)), + cast(cast(11 as int) as varchar(10)), + cast(cast(11 as bigint) as varchar(10)), + cast(cast(11.00 as float) as varchar(10)), + cast(cast(11.00 as double) as varchar(10)), + cast(cast(11.00 as decimal) as varchar(10)) +from src limit 1; + +select + cast(date '2011-01-01' as string), + cast(timestamp('2011-01-01 01:02:03') as string) +from src limit 1; + +select + cast(date '2011-01-01' as varchar(10)), + cast(timestamp('2011-01-01 01:02:03') as varchar(30)) +from src limit 1; + +select + cast(true as string), + cast(false as string) +from src limit 1; + +select + cast(true as varchar(10)), + cast(false as varchar(10)) +from src limit 1; + diff --git ql/src/test/queries/clientpositive/varchar_comparison.q ql/src/test/queries/clientpositive/varchar_comparison.q new file mode 100644 index 0000000..b6c6f40 --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_comparison.q @@ -0,0 +1,40 @@ + +-- Should all be true +select + cast('abc' as varchar(10)) = cast('abc' as varchar(10)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) < cast('abd' as varchar(10)), + cast('abc' as varchar(10)) > cast('abb' as varchar(10)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(10)) +from src limit 1; + +-- Different varchar lengths should still compare the same +select + cast('abc' as varchar(10)) = cast('abc' as varchar(3)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) < cast('abd' as varchar(3)), + cast('abc' as varchar(10)) > cast('abb' as varchar(3)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(3)) +from src limit 1; + +-- Should work with string types as well +select + cast('abc' as varchar(10)) = 'abc', + cast('abc' as varchar(10)) <= 'abc', + cast('abc' as varchar(10)) >= 'abc', + cast('abc' as varchar(10)) < 'abd', + cast('abc' as varchar(10)) > 'abb', + cast('abc' as varchar(10)) <> 'abb' +from src limit 1; + +-- leading space is significant for varchar +select + cast(' abc' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1; + +-- trailing space is significant for varchar +select + cast('abc ' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1; diff --git ql/src/test/queries/clientpositive/varchar_join1.q ql/src/test/queries/clientpositive/varchar_join1.q new file mode 100644 index 0000000..6a19efa --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_join1.q @@ -0,0 +1,35 @@ +drop table varchar_join1_vc1; +drop table varchar_join1_vc2; +drop table varchar_join1_str; + +create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +); + +create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +); + +create table varchar_join1_str ( + c1 int, + c2 string +); + +load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1; +load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2; +load data local inpath '../data/files/vc1.txt' into table varchar_join1_str; + +-- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1; + +-- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1; + +-- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1; + +drop table varchar_join1_vc1; +drop table varchar_join1_vc2; +drop table varchar_join1_str; diff --git ql/src/test/queries/clientpositive/varchar_nested_types.q ql/src/test/queries/clientpositive/varchar_nested_types.q new file mode 100644 index 0000000..f3f89f6 --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_nested_types.q @@ -0,0 +1,53 @@ +drop table varchar_nested_1; +drop table varchar_nested_array; +drop table varchar_nested_map; +drop table varchar_nested_struct; +drop table varchar_nested_cta; +drop table varchar_nested_view; + +create table varchar_nested_1 (key int, value varchar(20)); +insert overwrite table varchar_nested_1 + select key, value from src limit 5; + +-- arrays +create table varchar_nested_array (c1 array); +insert overwrite table varchar_nested_array + select array(value, value) from varchar_nested_1; +describe varchar_nested_array; +select * from varchar_nested_array; + +-- maps +create table varchar_nested_map (c1 map); +insert overwrite table varchar_nested_map + select map(key, value) from varchar_nested_1; +describe varchar_nested_map; +select * from varchar_nested_map; + +-- structs +create table varchar_nested_struct (c1 struct); +insert overwrite table varchar_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from varchar_nested_1; +describe varchar_nested_struct; +select * from varchar_nested_struct; + +-- nested type with create table as +create table varchar_nested_cta as + select * from varchar_nested_struct; +describe varchar_nested_cta; +select * from varchar_nested_cta; + +-- nested type with view +create table varchar_nested_view as + select * from varchar_nested_struct; +describe varchar_nested_view; +select * from varchar_nested_view; + +drop table varchar_nested_1; +drop table varchar_nested_array; +drop table varchar_nested_map; +drop table varchar_nested_struct; +drop table varchar_nested_cta; +drop table varchar_nested_view; diff --git ql/src/test/queries/clientpositive/varchar_udf1.q ql/src/test/queries/clientpositive/varchar_udf1.q new file mode 100644 index 0000000..df5b7bd --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_udf1.q @@ -0,0 +1,156 @@ +drop table varchar_udf_1; + +create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)); +insert overwrite table varchar_udf_1 + select key, value, key, value from src limit 5; + +-- UDFs with varchar support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from varchar_udf_1 limit 1; + +select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from varchar_udf_1 limit 1; + +select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from varchar_udf_1 limit 1; + +-- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from varchar_udf_1 limit 1; + +select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from varchar_udf_1 limit 1; + +select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from varchar_udf_1 limit 1; + +select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from varchar_udf_1 limit 1; + +select + length(c2), + length(c4), + length(c2) = length(c4) +from varchar_udf_1 limit 1; + +select + locate('a', 'abcdabcd', 3), + locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) +from varchar_udf_1 limit 1; + +select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from varchar_udf_1 limit 1; + +select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from varchar_udf_1 limit 1; + +select + regexp(c2, 'val'), + regexp(c4, 'val'), + regexp(c2, 'val') = regexp(c4, 'val') +from varchar_udf_1 limit 1; + +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1; + +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1; + +select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from varchar_udf_1 limit 1; + +select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from varchar_udf_1 limit 1; + +select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from varchar_udf_1 limit 1; + +select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as varchar(50))) +from varchar_udf_1 limit 1; + +select + split(c2, '_'), + split(c4, '_') +from varchar_udf_1 limit 1; + +select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') +from varchar_udf_1 limit 1; + +select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from varchar_udf_1 limit 1; + +select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from varchar_udf_1 limit 1; + + +-- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from varchar_udf_1; + +select + min(c2), + min(c4) +from varchar_udf_1; + +select + max(c2), + max(c4) +from varchar_udf_1; + + +drop table varchar_udf_1; diff --git ql/src/test/queries/clientpositive/varchar_union1.q ql/src/test/queries/clientpositive/varchar_union1.q new file mode 100644 index 0000000..cf90eab --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_union1.q @@ -0,0 +1,47 @@ +drop table varchar_union1_vc1; +drop table varchar_union1_vc2; +drop table varchar_union1_str; + +create table varchar_union1_vc1 ( + c1 int, + c2 varchar(10) +); + +create table varchar_union1_vc2 ( + c1 int, + c2 varchar(20) +); + +create table varchar_union1_str ( + c1 int, + c2 string +); + +load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1; +load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2; +load data local inpath '../data/files/vc1.txt' into table varchar_union1_str; + +-- union varchar with same length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc1 limit 1 +) q1 sort by c1; + +-- union varchar with different length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc2 limit 1 +) q1 sort by c1; + +-- union varchar with string +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_str limit 1 +) q1 sort by c1; + +drop table varchar_union1_vc1; +drop table varchar_union1_vc2; +drop table varchar_union1_str; diff --git ql/src/test/results/clientnegative/invalid_varchar_length_1.q.out ql/src/test/results/clientnegative/invalid_varchar_length_1.q.out new file mode 100644 index 0000000..707f195 --- /dev/null +++ ql/src/test/results/clientnegative/invalid_varchar_length_1.q.out @@ -0,0 +1,5 @@ +PREHOOK: query: drop table if exists invalid_varchar_length_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists invalid_varchar_length_1 +POSTHOOK: type: DROPTABLE +FAILED: SemanticException org.apache.hadoop.hive.serde2.SerDeException: Length 1000000 exceeds max varchar length of 65535 diff --git ql/src/test/results/clientnegative/invalid_varchar_length_2.q.out ql/src/test/results/clientnegative/invalid_varchar_length_2.q.out new file mode 100644 index 0000000..0ba4621 --- /dev/null +++ ql/src/test/results/clientnegative/invalid_varchar_length_2.q.out @@ -0,0 +1 @@ +FAILED: RuntimeException Error creating type parameters for varchar(100000): org.apache.hadoop.hive.serde2.SerDeException: Error creating type params for varchar: org.apache.hadoop.hive.serde2.SerDeException: Length 100000 exceeds max varchar length of 65535 diff --git ql/src/test/results/clientnegative/invalid_varchar_length_3.q.out ql/src/test/results/clientnegative/invalid_varchar_length_3.q.out new file mode 100644 index 0000000..4311082 --- /dev/null +++ ql/src/test/results/clientnegative/invalid_varchar_length_3.q.out @@ -0,0 +1,5 @@ +PREHOOK: query: drop table if exists invalid_varchar_length_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists invalid_varchar_length_3 +POSTHOOK: type: DROPTABLE +FAILED: SemanticException org.apache.hadoop.hive.serde2.SerDeException: VARCHAR length must be positive diff --git ql/src/test/results/clientpositive/alter_varchar1.q.out ql/src/test/results/clientpositive/alter_varchar1.q.out new file mode 100644 index 0000000..20cdf99 --- /dev/null +++ ql/src/test/results/clientpositive/alter_varchar1.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: drop table alter_varchar_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table alter_varchar_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table alter_varchar_1 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alter_varchar_1 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alter_varchar_1 +PREHOOK: query: insert overwrite table alter_varchar_1 + select key, value from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: insert overwrite table alter_varchar_1 + select key, value from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from alter_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +PREHOOK: query: -- change column to varchar +alter table alter_varchar_1 change column value value varchar(20) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_varchar_1 +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: -- change column to varchar +alter table alter_varchar_1 change column value value varchar(20) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_varchar_1 +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- contents should still look the same +select * from alter_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: -- contents should still look the same +select * from alter_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +PREHOOK: query: -- change column to smaller varchar +alter table alter_varchar_1 change column value value varchar(3) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_varchar_1 +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: -- change column to smaller varchar +alter table alter_varchar_1 change column value value varchar(3) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_varchar_1 +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- value column should be truncated now +select * from alter_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: -- value column should be truncated now +select * from alter_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val +86 val +311 val +27 val +165 val +PREHOOK: query: -- change back to bigger varchar +alter table alter_varchar_1 change column value value varchar(20) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_varchar_1 +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: -- change back to bigger varchar +alter table alter_varchar_1 change column value value varchar(20) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_varchar_1 +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- column values should be full size again +select * from alter_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: -- column values should be full size again +select * from alter_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +311 val_311 +27 val_27 +165 val_165 +PREHOOK: query: -- add varchar column +alter table alter_varchar_1 add columns (key2 int, value2 varchar(10)) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@alter_varchar_1 +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: -- add varchar column +alter table alter_varchar_1 add columns (key2 int, value2 varchar(10)) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@alter_varchar_1 +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from alter_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 NULL NULL +86 val_86 NULL NULL +311 val_311 NULL NULL +27 val_27 NULL NULL +165 val_165 NULL NULL +PREHOOK: query: insert overwrite table alter_varchar_1 + select key, value, key, value from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: insert overwrite table alter_varchar_1 + select key, value, key, value from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from alter_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 238 val_238 +86 val_86 86 val_86 +311 val_311 311 val_311 +27 val_27 27 val_27 +165 val_165 165 val_165 +PREHOOK: query: drop table alter_varchar_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alter_varchar_1 +PREHOOK: Output: default@alter_varchar_1 +POSTHOOK: query: drop table alter_varchar_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alter_varchar_1 +POSTHOOK: Output: default@alter_varchar_1 +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_varchar_1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/ctas_varchar.q.out ql/src/test/results/clientpositive/ctas_varchar.q.out new file mode 100644 index 0000000..a4a9c66 --- /dev/null +++ ql/src/test/results/clientpositive/ctas_varchar.q.out @@ -0,0 +1,126 @@ +PREHOOK: query: drop table ctas_varchar_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ctas_varchar_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ctas_varchar_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ctas_varchar_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop view ctas_varchar_3 +PREHOOK: type: DROPVIEW +POSTHOOK: query: drop view ctas_varchar_3 +POSTHOOK: type: DROPVIEW +PREHOOK: query: create table ctas_varchar_1 (key varchar(10), value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table ctas_varchar_1 (key varchar(10), value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@ctas_varchar_1 +PREHOOK: query: insert overwrite table ctas_varchar_1 + select key, value from src sort by key, value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ctas_varchar_1 +POSTHOOK: query: insert overwrite table ctas_varchar_1 + select key, value from src sort by key, value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ctas_varchar_1 +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- create table as with varchar column +create table ctas_varchar_2 as select key, value from ctas_varchar_1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ctas_varchar_1 +POSTHOOK: query: -- create table as with varchar column +create table ctas_varchar_2 as select key, value from ctas_varchar_1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ctas_varchar_1 +POSTHOOK: Output: default@ctas_varchar_2 +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- view with varchar column +create view ctas_varchar_3 as select key, value from ctas_varchar_2 +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- view with varchar column +create view ctas_varchar_3 as select key, value from ctas_varchar_2 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@ctas_varchar_3 +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from ctas_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from ctas_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select * from ctas_varchar_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas_varchar_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ctas_varchar_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas_varchar_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select * from ctas_varchar_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas_varchar_2 +PREHOOK: Input: default@ctas_varchar_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from ctas_varchar_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas_varchar_2 +POSTHOOK: Input: default@ctas_varchar_3 +#### A masked pattern was here #### +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: drop table ctas_varchar_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ctas_varchar_1 +PREHOOK: Output: default@ctas_varchar_1 +POSTHOOK: query: drop table ctas_varchar_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ctas_varchar_1 +POSTHOOK: Output: default@ctas_varchar_1 +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop table ctas_varchar_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ctas_varchar_2 +PREHOOK: Output: default@ctas_varchar_2 +POSTHOOK: query: drop table ctas_varchar_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ctas_varchar_2 +POSTHOOK: Output: default@ctas_varchar_2 +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop view ctas_varchar_3 +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@ctas_varchar_3 +PREHOOK: Output: default@ctas_varchar_3 +POSTHOOK: query: drop view ctas_varchar_3 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@ctas_varchar_3 +POSTHOOK: Output: default@ctas_varchar_3 +POSTHOOK: Lineage: ctas_varchar_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_varchar_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/partition_varchar1.q.out ql/src/test/results/clientpositive/partition_varchar1.q.out new file mode 100644 index 0000000..e7fef01 --- /dev/null +++ ql/src/test/results/clientpositive/partition_varchar1.q.out @@ -0,0 +1,369 @@ +PREHOOK: query: drop table partition_varchar_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table partition_varchar_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table partition_varchar_1 (key string, value varchar(20)) partitioned by (dt varchar(10), region int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table partition_varchar_1 (key string, value varchar(20)) partitioned by (dt varchar(10), region int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@partition_varchar_1 +PREHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=1) + select * from src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=1 +POSTHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=1) + select * from src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=1 +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=2) + select * from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=2 +POSTHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2000-01-01', region=2) + select * from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_varchar_1@dt=2000-01-01/region=2 +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=1) + select * from src limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_varchar_1@dt=2013-08-08/region=1 +POSTHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=1) + select * from src limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_varchar_1@dt=2013-08-08/region=1 +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=10) + select * from src limit 11 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@partition_varchar_1@dt=2013-08-08/region=10 +POSTHOOK: query: insert overwrite table partition_varchar_1 partition(dt='2013-08-08', region=10) + select * from src limit 11 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@partition_varchar_1@dt=2013-08-08/region=10 +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select distinct dt from partition_varchar_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=10 +#### A masked pattern was here #### +POSTHOOK: query: select distinct dt from partition_varchar_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=10 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +2000-01-01 +2013-08-08 +PREHOOK: query: select * from partition_varchar_1 where dt = '2000-01-01' and region = 2 order by key,value +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +#### A masked pattern was here #### +POSTHOOK: query: select * from partition_varchar_1 where dt = '2000-01-01' and region = 2 order by key,value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +165 val_165 2000-01-01 2 +238 val_238 2000-01-01 2 +27 val_27 2000-01-01 2 +311 val_311 2000-01-01 2 +86 val_86 2000-01-01 2 +PREHOOK: query: -- 15 +select count(*) from partition_varchar_1 where dt = '2000-01-01' +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +#### A masked pattern was here #### +POSTHOOK: query: -- 15 +select count(*) from partition_varchar_1 where dt = '2000-01-01' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +15 +PREHOOK: query: -- 5 +select count(*) from partition_varchar_1 where dt = '2000-01-01' and region = 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +#### A masked pattern was here #### +POSTHOOK: query: -- 5 +select count(*) from partition_varchar_1 where dt = '2000-01-01' and region = 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +5 +PREHOOK: query: -- 11 +select count(*) from partition_varchar_1 where dt = '2013-08-08' and region = 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=10 +#### A masked pattern was here #### +POSTHOOK: query: -- 11 +select count(*) from partition_varchar_1 where dt = '2013-08-08' and region = 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=10 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +11 +PREHOOK: query: -- 30 +select count(*) from partition_varchar_1 where region = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: query: -- 30 +select count(*) from partition_varchar_1 where region = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +30 +PREHOOK: query: -- 0 +select count(*) from partition_varchar_1 where dt = '2000-01-01' and region = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: -- 0 +select count(*) from partition_varchar_1 where dt = '2000-01-01' and region = 3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 +PREHOOK: query: -- 0 +select count(*) from partition_varchar_1 where dt = '1999-01-01' +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +#### A masked pattern was here #### +POSTHOOK: query: -- 0 +select count(*) from partition_varchar_1 where dt = '1999-01-01' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 +PREHOOK: query: -- Try other comparison operations + +-- 20 +select count(*) from partition_varchar_1 where dt > '2000-01-01' and region = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: query: -- Try other comparison operations + +-- 20 +select count(*) from partition_varchar_1 where dt > '2000-01-01' and region = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +20 +PREHOOK: query: -- 10 +select count(*) from partition_varchar_1 where dt < '2000-01-02' and region = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +#### A masked pattern was here #### +POSTHOOK: query: -- 10 +select count(*) from partition_varchar_1 where dt < '2000-01-02' and region = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +10 +PREHOOK: query: -- 20 +select count(*) from partition_varchar_1 where dt >= '2000-01-02' and region = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: query: -- 20 +select count(*) from partition_varchar_1 where dt >= '2000-01-02' and region = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +20 +PREHOOK: query: -- 10 +select count(*) from partition_varchar_1 where dt <= '2000-01-01' and region = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +#### A masked pattern was here #### +POSTHOOK: query: -- 10 +select count(*) from partition_varchar_1 where dt <= '2000-01-01' and region = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2000-01-01/region=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +10 +PREHOOK: query: -- 20 +select count(*) from partition_varchar_1 where dt <> '2000-01-01' and region = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: query: -- 20 +select count(*) from partition_varchar_1 where dt <> '2000-01-01' and region = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Input: default@partition_varchar_1@dt=2013-08-08/region=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +20 +PREHOOK: query: drop table partition_varchar_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@partition_varchar_1 +PREHOOK: Output: default@partition_varchar_1 +POSTHOOK: query: drop table partition_varchar_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@partition_varchar_1 +POSTHOOK: Output: default@partition_varchar_1 +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2000-01-01,region=2).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=10).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: partition_varchar_1 PARTITION(dt=2013-08-08,region=1).value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/varchar_1.q.out ql/src/test/results/clientpositive/varchar_1.q.out new file mode 100644 index 0000000..18d0b8f --- /dev/null +++ ql/src/test/results/clientpositive/varchar_1.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: drop table varchar1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar1_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar1_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar1 (key varchar(10), value varchar(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar1 (key varchar(10), value varchar(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar1 +PREHOOK: query: create table varchar1_1 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar1_1 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar1_1 +PREHOOK: query: -- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar1 +PREHOOK: query: select * from varchar1 limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar1 limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +474 val_475 +62 val_63 +PREHOOK: query: -- insert overwrite, from same/different length varchar +insert overwrite table varchar1 + select cast(key as varchar(10)), cast(value as varchar(15)) from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- insert overwrite, from same/different length varchar +insert overwrite table varchar1 + select cast(key as varchar(10)), cast(value as varchar(15)) from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from varchar1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +PREHOOK: query: -- insert overwrite, from string +insert overwrite table varchar1 + select key, value from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- insert overwrite, from string +insert overwrite table varchar1 + select key, value from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from varchar1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +PREHOOK: query: -- insert string from varchar +insert overwrite table varchar1_1 + select key, value from varchar1 limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +PREHOOK: Output: default@varchar1_1 +POSTHOOK: query: -- insert string from varchar +insert overwrite table varchar1_1 + select key, value from varchar1 limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +POSTHOOK: Output: default@varchar1_1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: select key, value from varchar1_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1_1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +238 val_238 +86 val_86 +PREHOOK: query: -- respect string length +insert overwrite table varchar1 + select key, cast(value as varchar(3)) from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- respect string length +insert overwrite table varchar1 + select key, cast(value as varchar(3)) from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: select key, value from varchar1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +238 val +86 val +PREHOOK: query: drop table varchar1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar1 +PREHOOK: Output: default@varchar1 +POSTHOOK: query: drop table varchar1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar1 +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar1_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar1_1 +PREHOOK: Output: default@varchar1_1 +POSTHOOK: query: drop table varchar1_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar1_1 +POSTHOOK: Output: default@varchar1_1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] diff --git ql/src/test/results/clientpositive/varchar_2.q.out ql/src/test/results/clientpositive/varchar_2.q.out new file mode 100644 index 0000000..50ab30f --- /dev/null +++ ql/src/test/results/clientpositive/varchar_2.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: drop table varchar_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_2 ( + key varchar(10), + value varchar(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_2 ( + key varchar(10), + value varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table varchar_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/varchar_cast.q.out ql/src/test/results/clientpositive/varchar_cast.q.out new file mode 100644 index 0000000..5a968f2 --- /dev/null +++ ql/src/test/results/clientpositive/varchar_cast.q.out @@ -0,0 +1,226 @@ +PREHOOK: query: -- Cast from varchar to other data types +select + cast(cast('11' as string) as tinyint), + cast(cast('11' as string) as smallint), + cast(cast('11' as string) as int), + cast(cast('11' as string) as bigint), + cast(cast('11.00' as string) as float), + cast(cast('11.00' as string) as double), + cast(cast('11.00' as string) as decimal) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Cast from varchar to other data types +select + cast(cast('11' as string) as tinyint), + cast(cast('11' as string) as smallint), + cast(cast('11' as string) as int), + cast(cast('11' as string) as bigint), + cast(cast('11.00' as string) as float), + cast(cast('11.00' as string) as double), + cast(cast('11.00' as string) as decimal) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(cast('11' as varchar(10)) as tinyint), + cast(cast('11' as varchar(10)) as smallint), + cast(cast('11' as varchar(10)) as int), + cast(cast('11' as varchar(10)) as bigint), + cast(cast('11.00' as varchar(10)) as float), + cast(cast('11.00' as varchar(10)) as double), + cast(cast('11.00' as varchar(10)) as decimal) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('11' as varchar(10)) as tinyint), + cast(cast('11' as varchar(10)) as smallint), + cast(cast('11' as varchar(10)) as int), + cast(cast('11' as varchar(10)) as bigint), + cast(cast('11.00' as varchar(10)) as float), + cast(cast('11.00' as varchar(10)) as double), + cast(cast('11.00' as varchar(10)) as decimal) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(cast('2011-01-01' as string) as date), + cast(cast('2011-01-01 01:02:03' as string) as timestamp) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('2011-01-01' as string) as date), + cast(cast('2011-01-01 01:02:03' as string) as timestamp) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: select + cast(cast('2011-01-01' as varchar(10)) as date), + cast(cast('2011-01-01 01:02:03' as varchar(30)) as timestamp) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('2011-01-01' as varchar(10)) as date), + cast(cast('2011-01-01 01:02:03' as varchar(30)) as timestamp) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: -- no tests from string/varchar to boolean, that conversion doesn't look useful +select + cast(cast('abc123' as string) as string), + cast(cast('abc123' as string) as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- no tests from string/varchar to boolean, that conversion doesn't look useful +select + cast(cast('abc123' as string) as string), + cast(cast('abc123' as string) as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +abc123 abc123 +PREHOOK: query: select + cast(cast('abc123' as varchar(10)) as string), + cast(cast('abc123' as varchar(10)) as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('abc123' as varchar(10)) as string), + cast(cast('abc123' as varchar(10)) as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +abc123 abc123 +PREHOOK: query: -- cast from other types to varchar +select + cast(cast(11 as tinyint) as string), + cast(cast(11 as smallint) as string), + cast(cast(11 as int) as string), + cast(cast(11 as bigint) as string), + cast(cast(11.00 as float) as string), + cast(cast(11.00 as double) as string), + cast(cast(11.00 as decimal) as string) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- cast from other types to varchar +select + cast(cast(11 as tinyint) as string), + cast(cast(11 as smallint) as string), + cast(cast(11 as int) as string), + cast(cast(11 as bigint) as string), + cast(cast(11.00 as float) as string), + cast(cast(11.00 as double) as string), + cast(cast(11.00 as decimal) as string) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(cast(11 as tinyint) as varchar(10)), + cast(cast(11 as smallint) as varchar(10)), + cast(cast(11 as int) as varchar(10)), + cast(cast(11 as bigint) as varchar(10)), + cast(cast(11.00 as float) as varchar(10)), + cast(cast(11.00 as double) as varchar(10)), + cast(cast(11.00 as decimal) as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast(11 as tinyint) as varchar(10)), + cast(cast(11 as smallint) as varchar(10)), + cast(cast(11 as int) as varchar(10)), + cast(cast(11 as bigint) as varchar(10)), + cast(cast(11.00 as float) as varchar(10)), + cast(cast(11.00 as double) as varchar(10)), + cast(cast(11.00 as decimal) as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(date '2011-01-01' as string), + cast(timestamp('2011-01-01 01:02:03') as string) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(date '2011-01-01' as string), + cast(timestamp('2011-01-01 01:02:03') as string) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: select + cast(date '2011-01-01' as varchar(10)), + cast(timestamp('2011-01-01 01:02:03') as varchar(30)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(date '2011-01-01' as varchar(10)), + cast(timestamp('2011-01-01 01:02:03') as varchar(30)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: select + cast(true as string), + cast(false as string) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(true as string), + cast(false as string) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +TRUE FALSE +PREHOOK: query: select + cast(true as varchar(10)), + cast(false as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(true as varchar(10)), + cast(false as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +TRUE FALSE diff --git ql/src/test/results/clientpositive/varchar_comparison.q.out ql/src/test/results/clientpositive/varchar_comparison.q.out new file mode 100644 index 0000000..e2c7aaf --- /dev/null +++ ql/src/test/results/clientpositive/varchar_comparison.q.out @@ -0,0 +1,105 @@ +PREHOOK: query: -- Should all be true +select + cast('abc' as varchar(10)) = cast('abc' as varchar(10)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) < cast('abd' as varchar(10)), + cast('abc' as varchar(10)) > cast('abb' as varchar(10)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true +select + cast('abc' as varchar(10)) = cast('abc' as varchar(10)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) < cast('abd' as varchar(10)), + cast('abc' as varchar(10)) > cast('abb' as varchar(10)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- Different varchar lengths should still compare the same +select + cast('abc' as varchar(10)) = cast('abc' as varchar(3)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) < cast('abd' as varchar(3)), + cast('abc' as varchar(10)) > cast('abb' as varchar(3)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(3)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Different varchar lengths should still compare the same +select + cast('abc' as varchar(10)) = cast('abc' as varchar(3)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) < cast('abd' as varchar(3)), + cast('abc' as varchar(10)) > cast('abb' as varchar(3)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(3)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- Should work with string types as well +select + cast('abc' as varchar(10)) = 'abc', + cast('abc' as varchar(10)) <= 'abc', + cast('abc' as varchar(10)) >= 'abc', + cast('abc' as varchar(10)) < 'abd', + cast('abc' as varchar(10)) > 'abb', + cast('abc' as varchar(10)) <> 'abb' +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Should work with string types as well +select + cast('abc' as varchar(10)) = 'abc', + cast('abc' as varchar(10)) <= 'abc', + cast('abc' as varchar(10)) >= 'abc', + cast('abc' as varchar(10)) < 'abd', + cast('abc' as varchar(10)) > 'abb', + cast('abc' as varchar(10)) <> 'abb' +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- leading space is significant for varchar +select + cast(' abc' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- leading space is significant for varchar +select + cast(' abc' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: -- trailing space is significant for varchar +select + cast('abc ' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- trailing space is significant for varchar +select + cast('abc ' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true diff --git ql/src/test/results/clientpositive/varchar_join1.q.out ql/src/test/results/clientpositive/varchar_join1.q.out new file mode 100644 index 0000000..b389de9 --- /dev/null +++ ql/src/test/results/clientpositive/varchar_join1.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: drop table varchar_join1_vc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_vc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_join1_vc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_vc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_join1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: create table varchar_join1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_join1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_join1_str +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_str +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_str +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_join1_str +PREHOOK: query: -- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_vc1 +PREHOOK: Input: default@varchar_join1_vc2 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_vc1 +POSTHOOK: Input: default@varchar_join1_vc2 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_str +PREHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_str +POSTHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: drop table varchar_join1_vc1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_vc1 +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: drop table varchar_join1_vc1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_vc1 +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: drop table varchar_join1_vc2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_vc2 +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: drop table varchar_join1_vc2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_vc2 +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: drop table varchar_join1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_str +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: drop table varchar_join1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_str +POSTHOOK: Output: default@varchar_join1_str diff --git ql/src/test/results/clientpositive/varchar_nested_types.q.out ql/src/test/results/clientpositive/varchar_nested_types.q.out new file mode 100644 index 0000000..19eb118 --- /dev/null +++ ql/src/test/results/clientpositive/varchar_nested_types.q.out @@ -0,0 +1,363 @@ +PREHOOK: query: drop table varchar_nested_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_array +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_array +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_map +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_map +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_struct +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_struct +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_cta +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_cta +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_view +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_view +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_nested_1 (key int, value varchar(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_nested_1 (key int, value varchar(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_1 +PREHOOK: query: insert overwrite table varchar_nested_1 + select key, value from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_nested_1 +POSTHOOK: query: insert overwrite table varchar_nested_1 + select key, value from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_nested_1 +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- arrays +create table varchar_nested_array (c1 array) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- arrays +create table varchar_nested_array (c1 array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_array +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table varchar_nested_array + select array(value, value) from varchar_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_array +POSTHOOK: query: insert overwrite table varchar_nested_array + select array(value, value) from varchar_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_array +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_array +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_array +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 array None +PREHOOK: query: select * from varchar_nested_array +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_array +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_array +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +["val_238","val_238"] +["val_86","val_86"] +["val_311","val_311"] +["val_27","val_27"] +["val_165","val_165"] +PREHOOK: query: -- maps +create table varchar_nested_map (c1 map) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- maps +create table varchar_nested_map (c1 map) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_map +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: insert overwrite table varchar_nested_map + select map(key, value) from varchar_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_map +POSTHOOK: query: insert overwrite table varchar_nested_map + select map(key, value) from varchar_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_map +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_map +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_map +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 map None +PREHOOK: query: select * from varchar_nested_map +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_map +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_map +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_map +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{238:"val_238"} +{86:"val_86"} +{311:"val_311"} +{27:"val_27"} +{165:"val_165"} +PREHOOK: query: -- structs +create table varchar_nested_struct (c1 struct) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- structs +create table varchar_nested_struct (c1 struct) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_struct +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: insert overwrite table varchar_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from varchar_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_struct +POSTHOOK: query: insert overwrite table varchar_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from varchar_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_struct +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_struct +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_struct +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from varchar_nested_struct +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_struct +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_struct +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_struct +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{"a":238,"b":"val_238","c":"val_238"} +{"a":86,"b":"val_86","c":"val_86"} +{"a":311,"b":"val_311","c":"val_311"} +{"a":27,"b":"val_27","c":"val_27"} +{"a":165,"b":"val_165","c":"val_165"} +PREHOOK: query: -- nested type with create table as +create table varchar_nested_cta as + select * from varchar_nested_struct +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@varchar_nested_struct +POSTHOOK: query: -- nested type with create table as +create table varchar_nested_cta as + select * from varchar_nested_struct +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@varchar_nested_struct +POSTHOOK: Output: default@varchar_nested_cta +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_cta +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_cta +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from varchar_nested_cta +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_cta +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_cta +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_cta +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{"a":238,"b":"val_238","c":"val_238"} +{"a":86,"b":"val_86","c":"val_86"} +{"a":311,"b":"val_311","c":"val_311"} +{"a":27,"b":"val_27","c":"val_27"} +{"a":165,"b":"val_165","c":"val_165"} +PREHOOK: query: -- nested type with view +create table varchar_nested_view as + select * from varchar_nested_struct +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@varchar_nested_struct +POSTHOOK: query: -- nested type with view +create table varchar_nested_view as + select * from varchar_nested_struct +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@varchar_nested_struct +POSTHOOK: Output: default@varchar_nested_view +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_view +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_view +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from varchar_nested_view +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_view +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_view +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_view +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{"a":238,"b":"val_238","c":"val_238"} +{"a":86,"b":"val_86","c":"val_86"} +{"a":311,"b":"val_311","c":"val_311"} +{"a":27,"b":"val_27","c":"val_27"} +{"a":165,"b":"val_165","c":"val_165"} +PREHOOK: query: drop table varchar_nested_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_1 +POSTHOOK: query: drop table varchar_nested_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_1 +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_array +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_array +PREHOOK: Output: default@varchar_nested_array +POSTHOOK: query: drop table varchar_nested_array +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_array +POSTHOOK: Output: default@varchar_nested_array +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_map +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_map +PREHOOK: Output: default@varchar_nested_map +POSTHOOK: query: drop table varchar_nested_map +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_map +POSTHOOK: Output: default@varchar_nested_map +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_struct +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_struct +PREHOOK: Output: default@varchar_nested_struct +POSTHOOK: query: drop table varchar_nested_struct +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_struct +POSTHOOK: Output: default@varchar_nested_struct +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_cta +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_cta +PREHOOK: Output: default@varchar_nested_cta +POSTHOOK: query: drop table varchar_nested_cta +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_cta +POSTHOOK: Output: default@varchar_nested_cta +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_view +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_view +PREHOOK: Output: default@varchar_nested_view +POSTHOOK: query: drop table varchar_nested_view +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_view +POSTHOOK: Output: default@varchar_nested_view +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] diff --git ql/src/test/results/clientpositive/varchar_udf1.q.out ql/src/test/results/clientpositive/varchar_udf1.q.out new file mode 100644 index 0000000..03d2a96 --- /dev/null +++ ql/src/test/results/clientpositive/varchar_udf1.q.out @@ -0,0 +1,554 @@ +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_udf_1 (c1 string, c2 string, c3 varchar(10), c4 varchar(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_udf_1 +PREHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: insert overwrite table varchar_udf_1 + select key, value, key, value from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_udf_1 +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- UDFs with varchar support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- UDFs with varchar support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238val_238 238val_238 true +PREHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +VAL_238 VAL_238 true +PREHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +118 118 true +PREHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238|val_238 238|val_238 true +PREHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +4 4 true +PREHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +7 7 true +PREHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +5 5 true +PREHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] + val_238 val_238 true +PREHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + regexp(c2, 'val'), + regexp(c4, 'val'), + regexp(c2, 'val') = regexp(c4, 'val') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp(c2, 'val'), + regexp(c4, 'val'), + regexp(c2, 'val') = regexp(c4, 'val') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +true true true +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 238 true +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +replaced_238 replaced_238 true +PREHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +832_lav 832_lav true +PREHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as varchar(50))) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as varchar(50))) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] +PREHOOK: query: select + split(c2, '_'), + split(c4, '_') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + split(c2, '_'), + split(c4, '_') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +["val","238"] ["val","238"] +PREHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} +PREHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val val true +PREHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from varchar_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from varchar_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from varchar_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from varchar_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +{"columntype":"String","maxlength":7,"avglength":6.6,"countnulls":0,"numdistinctvalues":7} {"columntype":"String","maxlength":7,"avglength":6.6,"countnulls":0,"numdistinctvalues":7} +PREHOOK: query: select + min(c2), + min(c4) +from varchar_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + min(c2), + min(c4) +from varchar_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_165 val_165 +PREHOOK: query: select + max(c2), + max(c4) +from varchar_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + max(c2), + max(c4) +from varchar_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_86 val_86 +PREHOOK: query: drop table varchar_udf_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_udf_1 +PREHOOK: Output: default@varchar_udf_1 +POSTHOOK: query: drop table varchar_udf_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_udf_1 +POSTHOOK: Output: default@varchar_udf_1 +POSTHOOK: Lineage: varchar_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/varchar_union1.q.out ql/src/test/results/clientpositive/varchar_union1.q.out new file mode 100644 index 0000000..8041b7b --- /dev/null +++ ql/src/test/results/clientpositive/varchar_union1.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: drop table varchar_union1_vc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_union1_vc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_union1_vc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_union1_vc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_union1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_union1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_union1_vc1 ( + c1 int, + c2 varchar(10) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_union1_vc1 ( + c1 int, + c2 varchar(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_union1_vc1 +PREHOOK: query: create table varchar_union1_vc2 ( + c1 int, + c2 varchar(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_union1_vc2 ( + c1 int, + c2 varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_union1_vc2 +PREHOOK: query: create table varchar_union1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_union1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_union1_str +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_union1_vc1 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_union1_vc1 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_union1_vc2 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_union1_vc2 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_str +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_union1_str +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_str +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_union1_str +PREHOOK: query: -- union varchar with same length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc1 limit 1 +) q1 sort by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- union varchar with same length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc1 limit 1 +) q1 sort by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: -- union varchar with different length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc2 limit 1 +) q1 sort by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_union1_vc1 +PREHOOK: Input: default@varchar_union1_vc2 +#### A masked pattern was here #### +POSTHOOK: query: -- union varchar with different length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc2 limit 1 +) q1 sort by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_union1_vc1 +POSTHOOK: Input: default@varchar_union1_vc2 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: -- union varchar with string +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_str limit 1 +) q1 sort by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_union1_str +PREHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- union varchar with string +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_str limit 1 +) q1 sort by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_union1_str +POSTHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: drop table varchar_union1_vc1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_union1_vc1 +PREHOOK: Output: default@varchar_union1_vc1 +POSTHOOK: query: drop table varchar_union1_vc1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_union1_vc1 +POSTHOOK: Output: default@varchar_union1_vc1 +PREHOOK: query: drop table varchar_union1_vc2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_union1_vc2 +PREHOOK: Output: default@varchar_union1_vc2 +POSTHOOK: query: drop table varchar_union1_vc2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_union1_vc2 +POSTHOOK: Output: default@varchar_union1_vc2 +PREHOOK: query: drop table varchar_union1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_union1_str +PREHOOK: Output: default@varchar_union1_str +POSTHOOK: query: drop table varchar_union1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_union1_str +POSTHOOK: Output: default@varchar_union1_str diff --git serde/if/serde.thrift serde/if/serde.thrift index 9847720..f1a6802 100644 --- serde/if/serde.thrift +++ serde/if/serde.thrift @@ -50,6 +50,8 @@ const string BIGINT_TYPE_NAME = "bigint"; const string FLOAT_TYPE_NAME = "float"; const string DOUBLE_TYPE_NAME = "double"; const string STRING_TYPE_NAME = "string"; +const string CHAR_TYPE_NAME = "char"; +const string VARCHAR_TYPE_NAME = "varchar"; const string DATE_TYPE_NAME = "date"; const string DATETIME_TYPE_NAME = "datetime"; const string TIMESTAMP_TYPE_NAME = "timestamp"; @@ -64,7 +66,7 @@ const string UNION_TYPE_NAME = "uniontype"; const string LIST_COLUMNS = "columns"; const string LIST_COLUMN_TYPES = "columns.types"; -const set PrimitiveTypes = [ VOID_TYPE_NAME BOOLEAN_TYPE_NAME TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME FLOAT_TYPE_NAME DOUBLE_TYPE_NAME STRING_TYPE_NAME DATE_TYPE_NAME DATETIME_TYPE_NAME TIMESTAMP_TYPE_NAME DECIMAL_TYPE_NAME BINARY_TYPE_NAME], +const set PrimitiveTypes = [ VOID_TYPE_NAME BOOLEAN_TYPE_NAME TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME FLOAT_TYPE_NAME DOUBLE_TYPE_NAME STRING_TYPE_NAME VARCHAR_TYPE_NAME CHAR_TYPE_NAME DATE_TYPE_NAME DATETIME_TYPE_NAME TIMESTAMP_TYPE_NAME DECIMAL_TYPE_NAME BINARY_TYPE_NAME], const set CollectionTypes = [ LIST_TYPE_NAME MAP_TYPE_NAME ], diff --git serde/src/gen/thrift/gen-cpp/serde_constants.cpp serde/src/gen/thrift/gen-cpp/serde_constants.cpp index 3997026..86a24af 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.cpp +++ serde/src/gen/thrift/gen-cpp/serde_constants.cpp @@ -57,6 +57,10 @@ serdeConstants::serdeConstants() { STRING_TYPE_NAME = "string"; + CHAR_TYPE_NAME = "char"; + + VARCHAR_TYPE_NAME = "varchar"; + DATE_TYPE_NAME = "date"; DATETIME_TYPE_NAME = "datetime"; @@ -88,6 +92,8 @@ serdeConstants::serdeConstants() { PrimitiveTypes.insert("float"); PrimitiveTypes.insert("double"); PrimitiveTypes.insert("string"); + PrimitiveTypes.insert("varchar"); + PrimitiveTypes.insert("char"); PrimitiveTypes.insert("date"); PrimitiveTypes.insert("datetime"); PrimitiveTypes.insert("timestamp"); diff --git serde/src/gen/thrift/gen-cpp/serde_constants.h serde/src/gen/thrift/gen-cpp/serde_constants.h index 0a63308..117d6c6 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.h +++ serde/src/gen/thrift/gen-cpp/serde_constants.h @@ -38,6 +38,8 @@ class serdeConstants { std::string FLOAT_TYPE_NAME; std::string DOUBLE_TYPE_NAME; std::string STRING_TYPE_NAME; + std::string CHAR_TYPE_NAME; + std::string VARCHAR_TYPE_NAME; std::string DATE_TYPE_NAME; std::string DATETIME_TYPE_NAME; std::string TIMESTAMP_TYPE_NAME; diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 28f8d6a..096f881 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -79,6 +79,10 @@ public static final String STRING_TYPE_NAME = "string"; + public static final String CHAR_TYPE_NAME = "char"; + + public static final String VARCHAR_TYPE_NAME = "varchar"; + public static final String DATE_TYPE_NAME = "date"; public static final String DATETIME_TYPE_NAME = "datetime"; @@ -112,6 +116,8 @@ PrimitiveTypes.add("float"); PrimitiveTypes.add("double"); PrimitiveTypes.add("string"); + PrimitiveTypes.add("varchar"); + PrimitiveTypes.add("char"); PrimitiveTypes.add("date"); PrimitiveTypes.add("datetime"); PrimitiveTypes.add("timestamp"); diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java index 1b708dd..dda3c5f 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/test/ThriftTestObj.java @@ -528,7 +528,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, ThriftTestObj struc struct.field3 = new ArrayList(_list0.size); for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - InnerStruct _elem2; // required + InnerStruct _elem2; // optional _elem2 = new InnerStruct(); _elem2.read(iprot); struct.field3.add(_elem2); @@ -636,7 +636,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, ThriftTestObj struct struct.field3 = new ArrayList(_list5.size); for (int _i6 = 0; _i6 < _list5.size; ++_i6) { - InnerStruct _elem7; // required + InnerStruct _elem7; // optional _elem7 = new InnerStruct(); _elem7.read(iprot); struct.field3.add(_elem7); diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java index e36a792..aa404bf 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/Complex.java @@ -836,7 +836,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr struct.lint = new ArrayList(_list0.size); for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - int _elem2; // required + int _elem2; // optional _elem2 = iprot.readI32(); struct.lint.add(_elem2); } @@ -854,7 +854,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr struct.lString = new ArrayList(_list3.size); for (int _i4 = 0; _i4 < _list3.size; ++_i4) { - String _elem5; // required + String _elem5; // optional _elem5 = iprot.readString(); struct.lString.add(_elem5); } @@ -872,7 +872,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, Complex struct) thr struct.lintString = new ArrayList(_list6.size); for (int _i7 = 0; _i7 < _list6.size; ++_i7) { - IntString _elem8; // required + IntString _elem8; // optional _elem8 = new IntString(); _elem8.read(iprot); struct.lintString.add(_elem8); @@ -1074,7 +1074,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro struct.lint = new ArrayList(_list21.size); for (int _i22 = 0; _i22 < _list21.size; ++_i22) { - int _elem23; // required + int _elem23; // optional _elem23 = iprot.readI32(); struct.lint.add(_elem23); } @@ -1087,7 +1087,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro struct.lString = new ArrayList(_list24.size); for (int _i25 = 0; _i25 < _list24.size; ++_i25) { - String _elem26; // required + String _elem26; // optional _elem26 = iprot.readString(); struct.lString.add(_elem26); } @@ -1100,7 +1100,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, Complex struct) thro struct.lintString = new ArrayList(_list27.size); for (int _i28 = 0; _i28 < _list27.size; ++_i28) { - IntString _elem29; // required + IntString _elem29; // optional _elem29 = new IntString(); _elem29.read(iprot); struct.lintString.add(_elem29); diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java index 386fef9..fba49e4 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde2/thrift/test/MegaStruct.java @@ -2280,7 +2280,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) _val19 = new ArrayList(_list20.size); for (int _i21 = 0; _i21 < _list20.size; ++_i21) { - String _elem22; // required + String _elem22; // optional _elem22 = iprot.readString(); _val19.add(_elem22); } @@ -2310,7 +2310,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) _val26 = new ArrayList(_list27.size); for (int _i28 = 0; _i28 < _list27.size; ++_i28) { - MiniStruct _elem29; // required + MiniStruct _elem29; // optional _elem29 = new MiniStruct(); _elem29.read(iprot); _val26.add(_elem29); @@ -2333,7 +2333,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) struct.my_stringlist = new ArrayList(_list30.size); for (int _i31 = 0; _i31 < _list30.size; ++_i31) { - String _elem32; // required + String _elem32; // optional _elem32 = iprot.readString(); struct.my_stringlist.add(_elem32); } @@ -2351,7 +2351,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) struct.my_structlist = new ArrayList(_list33.size); for (int _i34 = 0; _i34 < _list33.size; ++_i34) { - MiniStruct _elem35; // required + MiniStruct _elem35; // optional _elem35 = new MiniStruct(); _elem35.read(iprot); struct.my_structlist.add(_elem35); @@ -2370,7 +2370,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) struct.my_enumlist = new ArrayList(_list36.size); for (int _i37 = 0; _i37 < _list36.size; ++_i37) { - MyEnum _elem38; // required + MyEnum _elem38; // optional _elem38 = MyEnum.findByValue(iprot.readI32()); struct.my_enumlist.add(_elem38); } @@ -2388,7 +2388,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) struct.my_stringset = new HashSet(2*_set39.size); for (int _i40 = 0; _i40 < _set39.size; ++_i40) { - String _elem41; // required + String _elem41; // optional _elem41 = iprot.readString(); struct.my_stringset.add(_elem41); } @@ -2406,7 +2406,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) struct.my_enumset = new HashSet(2*_set42.size); for (int _i43 = 0; _i43 < _set42.size; ++_i43) { - MyEnum _elem44; // required + MyEnum _elem44; // optional _elem44 = MyEnum.findByValue(iprot.readI32()); struct.my_enumset.add(_elem44); } @@ -2424,7 +2424,7 @@ public void read(org.apache.thrift.protocol.TProtocol iprot, MegaStruct struct) struct.my_structset = new HashSet(2*_set45.size); for (int _i46 = 0; _i46 < _set45.size; ++_i46) { - MiniStruct _elem47; // required + MiniStruct _elem47; // optional _elem47 = new MiniStruct(); _elem47.read(iprot); struct.my_structset.add(_elem47); @@ -3023,7 +3023,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t _val95 = new ArrayList(_list96.size); for (int _i97 = 0; _i97 < _list96.size; ++_i97) { - String _elem98; // required + String _elem98; // optional _elem98 = iprot.readString(); _val95.add(_elem98); } @@ -3047,7 +3047,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t _val102 = new ArrayList(_list103.size); for (int _i104 = 0; _i104 < _list103.size; ++_i104) { - MiniStruct _elem105; // required + MiniStruct _elem105; // optional _elem105 = new MiniStruct(); _elem105.read(iprot); _val102.add(_elem105); @@ -3064,7 +3064,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t struct.my_stringlist = new ArrayList(_list106.size); for (int _i107 = 0; _i107 < _list106.size; ++_i107) { - String _elem108; // required + String _elem108; // optional _elem108 = iprot.readString(); struct.my_stringlist.add(_elem108); } @@ -3077,7 +3077,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t struct.my_structlist = new ArrayList(_list109.size); for (int _i110 = 0; _i110 < _list109.size; ++_i110) { - MiniStruct _elem111; // required + MiniStruct _elem111; // optional _elem111 = new MiniStruct(); _elem111.read(iprot); struct.my_structlist.add(_elem111); @@ -3091,7 +3091,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t struct.my_enumlist = new ArrayList(_list112.size); for (int _i113 = 0; _i113 < _list112.size; ++_i113) { - MyEnum _elem114; // required + MyEnum _elem114; // optional _elem114 = MyEnum.findByValue(iprot.readI32()); struct.my_enumlist.add(_elem114); } @@ -3104,7 +3104,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t struct.my_stringset = new HashSet(2*_set115.size); for (int _i116 = 0; _i116 < _set115.size; ++_i116) { - String _elem117; // required + String _elem117; // optional _elem117 = iprot.readString(); struct.my_stringset.add(_elem117); } @@ -3117,7 +3117,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t struct.my_enumset = new HashSet(2*_set118.size); for (int _i119 = 0; _i119 < _set118.size; ++_i119) { - MyEnum _elem120; // required + MyEnum _elem120; // optional _elem120 = MyEnum.findByValue(iprot.readI32()); struct.my_enumset.add(_elem120); } @@ -3130,7 +3130,7 @@ public void read(org.apache.thrift.protocol.TProtocol prot, MegaStruct struct) t struct.my_structset = new HashSet(2*_set121.size); for (int _i122 = 0; _i122 < _set121.size; ++_i122) { - MiniStruct _elem123; // required + MiniStruct _elem123; // optional _elem123 = new MiniStruct(); _elem123.read(iprot); struct.my_structset.add(_elem123); diff --git serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php index 130c17e..8a0b415 100644 --- serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php +++ serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php @@ -62,6 +62,10 @@ $GLOBALS['serde_CONSTANTS']['DOUBLE_TYPE_NAME'] = "double"; $GLOBALS['serde_CONSTANTS']['STRING_TYPE_NAME'] = "string"; +$GLOBALS['serde_CONSTANTS']['CHAR_TYPE_NAME'] = "char"; + +$GLOBALS['serde_CONSTANTS']['VARCHAR_TYPE_NAME'] = "varchar"; + $GLOBALS['serde_CONSTANTS']['DATE_TYPE_NAME'] = "date"; $GLOBALS['serde_CONSTANTS']['DATETIME_TYPE_NAME'] = "datetime"; @@ -94,6 +98,8 @@ $GLOBALS['serde_CONSTANTS']['PrimitiveTypes'] = array( "float" => true, "double" => true, "string" => true, + "varchar" => true, + "char" => true, "date" => true, "datetime" => true, "timestamp" => true, diff --git serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py index 623bf0e..47aab22 100644 --- serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py +++ serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py @@ -32,6 +32,8 @@ FLOAT_TYPE_NAME = "float" DOUBLE_TYPE_NAME = "double" STRING_TYPE_NAME = "string" +CHAR_TYPE_NAME = "char" +VARCHAR_TYPE_NAME = "varchar" DATE_TYPE_NAME = "date" DATETIME_TYPE_NAME = "datetime" TIMESTAMP_TYPE_NAME = "timestamp" @@ -53,6 +55,8 @@ "float", "double", "string", + "varchar", + "char", "date", "datetime", "timestamp", diff --git serde/src/gen/thrift/gen-rb/serde_constants.rb serde/src/gen/thrift/gen-rb/serde_constants.rb index bd17761..200cefd 100644 --- serde/src/gen/thrift/gen-rb/serde_constants.rb +++ serde/src/gen/thrift/gen-rb/serde_constants.rb @@ -53,6 +53,10 @@ DOUBLE_TYPE_NAME = %q"double" STRING_TYPE_NAME = %q"string" +CHAR_TYPE_NAME = %q"char" + +VARCHAR_TYPE_NAME = %q"varchar" + DATE_TYPE_NAME = %q"date" DATETIME_TYPE_NAME = %q"datetime" @@ -85,6 +89,8 @@ PrimitiveTypes = Set.new([ %q"float", %q"double", %q"string", + %q"varchar", + %q"char", %q"date", %q"datetime", %q"timestamp", diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index eb64985..4b0ae7f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; @@ -283,6 +284,13 @@ static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, Stri sb.append('"'); break; } + case VARCHAR: { + sb.append('"'); + sb.append(escapeString(((HiveVarcharObjectInspector) poi) + .getPrimitiveJavaObject(o).toString())); + sb.append('"'); + break; + } case DATE: { sb.append('"'); sb.append(((DateObjectInspector) poi) diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java index feef48a..9183858 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -91,8 +92,7 @@ private ObjectInspector createObjectInspectorWorker(TypeInfo ti) throws SerDeExc switch(ti.getCategory()) { case PRIMITIVE: PrimitiveTypeInfo pti = (PrimitiveTypeInfo)ti; - result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(pti.getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti); break; case STRUCT: StructTypeInfo sti = (StructTypeInfo)ti; diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index 0141198..1b4c509 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; @@ -31,6 +33,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; @@ -39,6 +42,7 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,14 +60,18 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils.HiveVarcharSerDeHelper; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -276,48 +284,18 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, } case STRING: { Text r = reuse == null ? new Text() : (Text) reuse; - // Get the actual length first - int start = buffer.tell(); - int length = 0; - do { - byte b = buffer.read(invert); - if (b == 0) { - // end of string - break; - } - if (b == 1) { - // the last char is an escape char. read the actual char - buffer.read(invert); - } - length++; - } while (true); + return deserializeText(buffer, invert, r); + } - if (length == buffer.tell() - start) { - // No escaping happened, so we are already done. - r.set(buffer.getData(), start, length); - } else { - // Escaping happened, we need to copy byte-by-byte. - // 1. Set the length first. - r.set(buffer.getData(), start, length); - // 2. Reset the pointer. - buffer.seek(start); - // 3. Copy the data. - byte[] rdata = r.getBytes(); - for (int i = 0; i < length; i++) { - byte b = buffer.read(invert); - if (b == 1) { - // The last char is an escape char, read the actual char. - // The serialization format escape \0 to \1, and \1 to \2, - // to make sure the string is null-terminated. - b = (byte) (buffer.read(invert) - 1); - } - rdata[i] = b; - } - // 4. Read the null terminator. - byte b = buffer.read(invert); - assert (b == 0); - } - return r; + case VARCHAR: { + HiveVarcharWritable r = + reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse; + // Use HiveVarchar's internal Text member to read the value. + deserializeText(buffer, invert, r.getTextValue()); + // If we cache helper data for deserialization we could avoid having + // to call getVarcharMaxLength() on every deserialize call. + r.enforceMaxLength(getVarcharMaxLength(type)); + return r; } case BINARY: { @@ -552,6 +530,60 @@ private static int deserializeInt(InputByteBuffer buffer, boolean invert) throws return v; } + static int getVarcharMaxLength(TypeInfo type) { + VarcharTypeParams typeParams = (VarcharTypeParams) ((PrimitiveTypeInfo) type).getTypeParams(); + if (typeParams != null ) { + return typeParams.length; + } + return -1; + } + + static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) + throws IOException { + // Get the actual length first + int start = buffer.tell(); + int length = 0; + do { + byte b = buffer.read(invert); + if (b == 0) { + // end of string + break; + } + if (b == 1) { + // the last char is an escape char. read the actual char + buffer.read(invert); + } + length++; + } while (true); + + if (length == buffer.tell() - start) { + // No escaping happened, so we are already done. + r.set(buffer.getData(), start, length); + } else { + // Escaping happened, we need to copy byte-by-byte. + // 1. Set the length first. + r.set(buffer.getData(), start, length); + // 2. Reset the pointer. + buffer.seek(start); + // 3. Copy the data. + byte[] rdata = r.getBytes(); + for (int i = 0; i < length; i++) { + byte b = buffer.read(invert); + if (b == 1) { + // The last char is an escape char, read the actual char. + // The serialization format escape \0 to \1, and \1 to \2, + // to make sure the string is null-terminated. + b = (byte) (buffer.read(invert) - 1); + } + rdata[i] = b; + } + // 4. Read the null terminator. + byte b = buffer.read(invert); + assert (b == 0); + } + return r; + } + BytesWritable serializeBytesWritable = new BytesWritable(); OutputByteBuffer outputByteBuffer = new OutputByteBuffer(); @@ -572,7 +604,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDe } static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, - boolean invert) { + boolean invert) throws SerDeException { // Is this field a null? if (o == null) { buffer.write((byte) 0, invert); @@ -668,6 +700,18 @@ static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, return; } + case VARCHAR: { + HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector)poi; + HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o); + try { + ByteBuffer bb = Text.encode(hc.getHiveVarchar().getValue()); + serializeBytes(buffer, bb.array(), bb.limit(), invert); + } catch (CharacterCodingException err) { + throw new SerDeException(err); + } + return; + } + case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable ba = baoi.getPrimitiveWritableObject(o); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java index 8ebc785..f3a2b1a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java @@ -17,7 +17,7 @@ */ /** - * This file is back-ported from hadoop-0.19, to make sure hive can run + * This file is back-ported from hadoop-0.19, to make sure hive can run * with hadoop-0.17. */ package org.apache.hadoop.hive.serde2.io; diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java new file mode 100644 index 0000000..aaaee10 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; + +public class HiveVarcharWritable implements WritableComparable{ + protected Text value = new Text(); + transient protected int characterLength = -1; + + public HiveVarcharWritable() { + } + + public HiveVarcharWritable(HiveVarchar hc) { + set(hc); + } + + public HiveVarcharWritable(HiveVarcharWritable hcw) { + set(hcw); + } + + public void set(HiveVarchar val) { + set(val.getValue()); + } + + public void set(String val) { + set(val, -1); // copy entire string value + } + + public void set(HiveVarcharWritable val) { + value.set(val.value); + characterLength = val.characterLength; + } + + public void set(HiveVarcharWritable val, int maxLength) { + if (val.characterLength > 0 && val.characterLength >= maxLength) { + set(val); + } else { + set(val.getHiveVarchar(), maxLength); + } + } + + public void set(HiveVarchar val, int len) { + set(val.getValue(), len); + } + + public void set(String val, int maxLength) { + value.set(HiveBaseChar.enforceMaxLength(val, maxLength)); + } + + public HiveVarchar getHiveVarchar() { + return new HiveVarchar(value.toString(), -1); + } + + public int getCharacterLength() { + if (characterLength < 0) { + characterLength = getHiveVarchar().getCharacterLength(); + } + return characterLength; + } + + public void enforceMaxLength(int maxLength) { + // Might be possible to truncate the existing Text value, for now just do something simple. + set(getHiveVarchar(), maxLength); + } + + public void readFields(DataInput in) throws IOException { + value.readFields(in); + } + + public void write(DataOutput out) throws IOException { + value.write(out); + } + + public int compareTo(HiveVarcharWritable rhs) { + return ShimLoader.getHadoopShims().compareText(value, rhs.value); + } + + public boolean equals(Object obj) { + if (obj == null || !(obj instanceof HiveVarcharWritable)) { + return false; + } + return value.equals(((HiveVarcharWritable)obj).value); + } + + @Override + public String toString() { + return value.toString(); + } + + public int hashCode() { + return value.hashCode(); + } + + /** + * Access to the internal Text member. Use with care. + * @return + */ + public Text getTextValue() { + return value; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index a0ff609..67f032c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; @@ -113,6 +114,8 @@ return new LazyDouble((LazyDoubleObjectInspector) oi); case STRING: return new LazyString((LazyStringObjectInspector) oi); + case VARCHAR: + return new LazyHiveVarchar((LazyHiveVarcharObjectInspector) oi); case DATE: return new LazyDate((LazyDateObjectInspector) oi); case TIMESTAMP: diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java new file mode 100644 index 0000000..1286cba --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import java.nio.charset.CharacterCodingException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.io.Text; + +/** + * LazyObject for storing a value of HiveVarchar. + * + */ +public class LazyHiveVarchar extends + LazyPrimitive { + + private static final Log LOG = LogFactory.getLog(LazyHiveVarchar.class); + + protected int maxLength = -1; + + public LazyHiveVarchar(LazyHiveVarcharObjectInspector oi) { + super(oi); + VarcharTypeParams typeParams = (VarcharTypeParams)oi.getTypeParams(); + if (typeParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + maxLength = typeParams.getLength(); + data = new HiveVarcharWritable(); + } + + public LazyHiveVarchar(LazyHiveVarchar copy) { + super(copy); + this.maxLength = copy.maxLength; + data = new HiveVarcharWritable(copy.data); + } + + public void setValue(LazyHiveVarchar copy) { + data.set(copy.data, maxLength); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + String byteData = null; + try { + byteData = Text.decode(bytes.getData(), start, length); + data.set(byteData, maxLength); + isNull = false; + } catch (CharacterCodingException e) { + isNull = true; + LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e); + } + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 8a538c0..05822cb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; @@ -39,6 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; @@ -225,6 +227,12 @@ public static void writePrimitiveUTF8(OutputStream out, Object o, break; } + case VARCHAR: { + HiveVarcharWritable hc = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(o); + ByteBuffer b = Text.encode(hc.toString()); + writeEscaped(out, b.array(), 0, b.limit(), escaped, escapeChar, needsEscape); + break; + } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); byte[] toEncode = new byte[bw.getLength()]; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java new file mode 100644 index 0000000..dde9b96 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; + + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; + +public class LazyHiveVarcharObjectInspector + extends AbstractPrimitiveLazyObjectInspector + implements HiveVarcharObjectInspector { + + public LazyHiveVarcharObjectInspector(PrimitiveTypeEntry typeEntry) { + super(typeEntry); + if (typeEntry.primitiveCategory != PrimitiveCategory.VARCHAR) { + throw new RuntimeException( + "TypeEntry of type varchar expected, got " + typeEntry.primitiveCategory); + } + } + + @Override + public Object copyObject(Object o) { + if (o == null) { + return null; + } + + LazyHiveVarchar ret = new LazyHiveVarchar(this); + ret.setValue((LazyHiveVarchar) o); + return ret; + } + + @Override + public HiveVarchar getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + + HiveVarchar ret = ((LazyHiveVarchar) o).getWritableObject().getHiveVarchar(); + if (!ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams( + ret, (VarcharTypeParams) typeParams)) { + HiveVarchar newValue = new HiveVarchar(ret, ((VarcharTypeParams) typeParams).length); + return newValue; + } + return ret; + } + + public String toString() { + return getTypeName(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index 2f09224..e28eef7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -21,9 +21,11 @@ import java.util.ArrayList; import java.util.HashMap; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; @@ -65,6 +67,8 @@ new LazyBinaryObjectInspector(); public static final LazyHiveDecimalObjectInspector LAZY_BIG_DECIMAL_OBJECT_INSPECTOR = new LazyHiveDecimalObjectInspector(); + public static final LazyHiveVarcharObjectInspector LAZY_VARCHAR_OBJECT_INSPECTOR = + new LazyHiveVarcharObjectInspector(PrimitiveObjectInspectorUtils.varcharTypeEntry); static HashMap, LazyStringObjectInspector> cachedLazyStringObjectInspector = new HashMap, LazyStringObjectInspector>(); @@ -96,8 +100,14 @@ public static PrimitiveObjectInspector getParameterizedObjectInspector( if (poi == null) { // Object inspector hasn't been cached for this type/params yet, create now switch (primitiveCategory) { - // Get type entry for parameterized type, and create new object inspector for type - // Currently no parameterized types + case VARCHAR: + PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, + typeParams); + poi = new LazyHiveVarcharObjectInspector(typeEntry); + poi.setTypeParams(typeParams); + cachedParameterizedLazyObjectInspectors.setObjectInspector(poi); + break; default: throw new RuntimeException( @@ -126,6 +136,8 @@ public static PrimitiveObjectInspector getParameterizedObjectInspector( return LAZY_DOUBLE_OBJECT_INSPECTOR; case STRING: return getLazyStringObjectInspector(escaped, escapeChar); + case VARCHAR: + return LAZY_VARCHAR_OBJECT_INSPECTOR; case BINARY: return LAZY_BINARY_OBJECT_INSPECTOR; case VOID: @@ -151,7 +163,10 @@ public static PrimitiveObjectInspector getParameterizedObjectInspector( return getLazyObjectInspector(primitiveCategory, escaped, escapeChar); } else { switch(primitiveCategory) { - // call getParameterizedObjectInspector(). But no parameterized types yet + case VARCHAR: + LazyHiveVarcharObjectInspector oi = (LazyHiveVarcharObjectInspector) + getParameterizedObjectInspector(typeSpec); + return oi; default: throw new RuntimeException("Type " + primitiveCategory + " does not take parameters"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java index 48b3c05..c3d31f1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; @@ -71,6 +72,8 @@ return new LazyBinaryDouble((WritableDoubleObjectInspector) oi); case STRING: return new LazyBinaryString((WritableStringObjectInspector) oi); + case VARCHAR: + return new LazyBinaryHiveVarchar((WritableHiveVarcharObjectInspector) oi); case VOID: // for NULL return new LazyBinaryVoid((WritableVoidObjectInspector) oi); case DATE: diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java new file mode 100644 index 0000000..fb1b0ea --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.io.Text; + +public class LazyBinaryHiveVarchar extends + LazyBinaryPrimitive { + + protected int maxLength = -1; + + LazyBinaryHiveVarchar(WritableHiveVarcharObjectInspector oi) { + super(oi); + // Check for params + VarcharTypeParams typeParams = (VarcharTypeParams)oi.getTypeParams(); + if (typeParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + maxLength = typeParams.length; + data = new HiveVarcharWritable(); + } + + LazyBinaryHiveVarchar(LazyBinaryHiveVarchar copy) { + super(copy); + maxLength = copy.maxLength; + data = new HiveVarcharWritable(copy.data); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + // re-use existing text member in varchar writable + Text textValue = data.getTextValue(); + textValue.set(bytes.getData(), start, length); + data.enforceMaxLength(maxLength); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index 76acc6a..77a1951 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.serde2.lazybinary; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -44,6 +46,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -231,7 +235,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) * once already */ private static boolean serializeStruct(Output byteStream, Object obj, - StructObjectInspector soi, boolean warnedOnceNullMapKey) { + StructObjectInspector soi, boolean warnedOnceNullMapKey) throws SerDeException { // do nothing for null struct if (null == obj) { return warnedOnceNullMapKey; @@ -284,7 +288,8 @@ private static boolean serializeStruct(Output byteStream, Object obj, * once already */ public static boolean serialize(Output byteStream, Object obj, - ObjectInspector objInspector, boolean skipLengthPrefix, boolean warnedOnceNullMapKey) { + ObjectInspector objInspector, boolean skipLengthPrefix, boolean warnedOnceNullMapKey) + throws SerDeException { // do nothing for null object if (null == obj) { @@ -363,7 +368,24 @@ public static boolean serialize(Output byteStream, Object obj, byteStream.write(data, 0, length); return warnedOnceNullMapKey; } - + case VARCHAR: { + HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi; + String value = + hcoi.getPrimitiveWritableObject(obj).getHiveVarchar().getValue(); + int length = value.length(); + // Write byte size + if (!skipLengthPrefix) { + LazyBinaryUtils.writeVInt(byteStream, length); + } + // Write string value + try { + ByteBuffer bb = Text.encode(value); + byteStream.write(bb.array(), 0, bb.limit()); + } catch (CharacterCodingException err) { + throw new SerDeException(err); + } + return warnedOnceNullMapKey; + } case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable bw = baoi.getPrimitiveWritableObject(obj); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java index a22c04d..f8a44b7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -196,6 +196,11 @@ public static void checkObjectByteInfo(ObjectInspector objectInspector, recordInfo.elementSize = vInt.value; break; + case VARCHAR: + LazyBinaryUtils.readVInt(bytes, offset, vInt); + recordInfo.elementOffset = vInt.length; + recordInfo.elementSize = vInt.value; + break; case BINARY: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index 00c95e6..8f700b3a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -31,6 +31,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableLongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableShortObjectInspector; @@ -101,6 +103,10 @@ private static Converter getConverter(PrimitiveObjectInspector inputOI, return new PrimitiveObjectInspectorConverter.StringConverter( inputOI); } + case VARCHAR: + return new PrimitiveObjectInspectorConverter.HiveVarcharConverter( + inputOI, + (SettableHiveVarcharObjectInspector) outputOI); case DATE: return new PrimitiveObjectInspectorConverter.DateConverter( inputOI, diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index db7028a..ec51dd5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; @@ -484,6 +486,8 @@ public static int hashCode(Object o, ObjectInspector objIns) { } return r; } + case VARCHAR: + return ((HiveVarcharObjectInspector)poi).getPrimitiveWritableObject(o).hashCode(); case BINARY: return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); @@ -679,6 +683,11 @@ public static int compare(Object o1, ObjectInspector oi1, Object o2, .compareTo(s2)); } } + case VARCHAR: { + HiveVarcharWritable t1 = ((HiveVarcharObjectInspector)poi1).getPrimitiveWritableObject(o1); + HiveVarcharWritable t2 = ((HiveVarcharObjectInspector)poi2).getPrimitiveWritableObject(o2); + return t1.compareTo(t2); + } case BINARY: { BytesWritable bw1 = ((BinaryObjectInspector) poi1).getPrimitiveWritableObject(o1); BytesWritable bw2 = ((BinaryObjectInspector) poi2).getPrimitiveWritableObject(o2); @@ -948,7 +957,7 @@ public static ConstantObjectInspector getConstantObjectInspector(ObjectInspector case PRIMITIVE: PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; return PrimitiveObjectInspectorFactory.getPrimitiveWritableConstantObjectInspector( - poi.getPrimitiveCategory(), writableValue); + poi, writableValue); case LIST: ListObjectInspector loi = (ListObjectInspector) oi; return ObjectInspectorFactory.getStandardConstantListObjectInspector( diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java index 353a99c..21947ff 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java @@ -31,7 +31,7 @@ */ public static enum PrimitiveCategory { VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, - DATE, TIMESTAMP, BINARY, DECIMAL, UNKNOWN + DATE, TIMESTAMP, BINARY, DECIMAL, VARCHAR, UNKNOWN }; /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveVarcharObjectInspector.java new file mode 100644 index 0000000..a3ae161 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveVarcharObjectInspector.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +public interface HiveVarcharObjectInspector extends PrimitiveObjectInspector { + HiveVarcharWritable getPrimitiveWritableObject(Object o); + + HiveVarchar getPrimitiveJavaObject(Object o); +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java new file mode 100644 index 0000000..c260d06 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +public class JavaHiveVarcharObjectInspector + extends AbstractPrimitiveJavaObjectInspector + implements SettableHiveVarcharObjectInspector { + + public JavaHiveVarcharObjectInspector(PrimitiveTypeEntry typeEntry) { + super(typeEntry); + if (typeEntry.primitiveCategory != PrimitiveCategory.VARCHAR) { + throw new RuntimeException( + "TypeEntry of type varchar expected, got " + typeEntry.primitiveCategory); + } + } + + public HiveVarchar getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + HiveVarchar value = (HiveVarchar)o; + if (ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams( + value, (VarcharTypeParams) typeParams)) { + return value; + } + // value needs to be converted to match the type params (length, etc). + return getPrimitiveWithParams(value); + } + + @Override + public HiveVarcharWritable getPrimitiveWritableObject(Object o) { + if (o == null) { + return null; + } + return getWritableWithParams((HiveVarchar)o); + } + + private HiveVarchar getPrimitiveWithParams(HiveVarchar val) { + HiveVarchar hv = new HiveVarchar(val, getMaxLength()); + return hv; + } + + private HiveVarcharWritable getWritableWithParams(HiveVarchar val) { + HiveVarcharWritable newValue = new HiveVarcharWritable(); + newValue.set(val, getMaxLength()); + return newValue; + } + + @Override + public Object set(Object o, HiveVarchar value) { + HiveVarchar setValue = (HiveVarchar)o; + if (ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams( + value, (VarcharTypeParams) typeParams)) { + setValue.setValue(value); + } else { + // Otherwise value may be too long, convert to appropriate value based on params + setValue.setValue(value, getMaxLength()); + } + + return setValue; + } + + @Override + public Object set(Object o, String value) { + HiveVarchar convertedValue = (HiveVarchar)o; + convertedValue.setValue(value, getMaxLength()); + return convertedValue; + } + + @Override + public Object create(HiveVarchar value) { + HiveVarchar hc = new HiveVarchar(value, getMaxLength()); + return hc; + } + + public int getMaxLength() { + return typeParams != null ? ((VarcharTypeParams) typeParams).length : -1; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index 0f9df78..ac105d3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -22,11 +22,14 @@ import java.sql.Timestamp; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.Text; /** @@ -394,6 +397,14 @@ public Text convert(Object input) { t.set(((StringObjectInspector) inputOI).getPrimitiveJavaObject(input)); } return t; + case VARCHAR: + if (inputOI.preferWritable()) { + t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveWritableObject(input) + .toString()); + } else { + t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveJavaObject(input).toString()); + } + return t; case DATE: t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); return t; @@ -430,4 +441,40 @@ public Object convert(Object input) { } } + + public static class HiveVarcharConverter implements Converter { + + PrimitiveObjectInspector inputOI; + SettableHiveVarcharObjectInspector outputOI; + HiveVarcharWritable hc; + + public HiveVarcharConverter(PrimitiveObjectInspector inputOI, + SettableHiveVarcharObjectInspector outputOI) { + this.inputOI = inputOI; + this.outputOI = outputOI; + VarcharTypeParams typeParams = (VarcharTypeParams) outputOI.getTypeParams(); + + // unfortunately we seem to get instances of varchar object inspectors without params + // when an old-style UDF has an evaluate() method with varchar arguments. + // If we disallow varchar in old-style UDFs and only allow GenericUDFs to be defined + // with varchar arguments, then we might be able to enforce this properly. + //if (typeParams == null) { + // throw new RuntimeException("varchar type used without type params"); + //} + hc = new HiveVarcharWritable(); + } + + @Override + public Object convert(Object input) { + switch (inputOI.getPrimitiveCategory()) { + case BOOLEAN: + return outputOI.set(hc, + ((BooleanObjectInspector) inputOI).get(input) ? + new HiveVarchar("TRUE", -1) : new HiveVarchar("FALSE", -1)); + default: + return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI)); + } + } + + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java index 12e06dd..c74f24d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -68,6 +70,8 @@ new JavaDoubleObjectInspector(); public static final JavaStringObjectInspector javaStringObjectInspector = new JavaStringObjectInspector(); + public static final JavaHiveVarcharObjectInspector javaHiveVarcharObjectInspector = + new JavaHiveVarcharObjectInspector(PrimitiveObjectInspectorUtils.varcharTypeEntry); public static final JavaVoidObjectInspector javaVoidObjectInspector = new JavaVoidObjectInspector(); public static final JavaDateObjectInspector javaDateObjectInspector = @@ -95,6 +99,8 @@ new WritableDoubleObjectInspector(); public static final WritableStringObjectInspector writableStringObjectInspector = new WritableStringObjectInspector(); + public static final WritableHiveVarcharObjectInspector writableHiveVarcharObjectInspector = + new WritableHiveVarcharObjectInspector(PrimitiveObjectInspectorUtils.varcharTypeEntry); public static final WritableVoidObjectInspector writableVoidObjectInspector = new WritableVoidObjectInspector(); public static final WritableDateObjectInspector writableDateObjectInspector = @@ -125,6 +131,8 @@ writableDoubleObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.STRING, writableStringObjectInspector); + cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.VARCHAR, + writableHiveVarcharObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.VOID, writableVoidObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.DATE, @@ -156,6 +164,8 @@ javaDoubleObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.STRING, javaStringObjectInspector); + cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.VARCHAR, + javaHiveVarcharObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.VOID, javaVoidObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.DATE, @@ -229,7 +239,14 @@ public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjec if (oi == null) { // Do a bit of validation - not all primitive types use parameters. switch (primitiveCategory) { - // Currently no parameterized types + case VARCHAR: + PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, + primitiveTypeParams); + oi = new WritableHiveVarcharObjectInspector(typeEntry); + oi.setTypeParams(primitiveTypeParams); + cachedParameterizedPrimitiveWritableObjectInspectorCache.setObjectInspector(oi); + break; default: throw new RuntimeException( "Primitve type " + primitiveCategory + " should not take parameters"); @@ -248,6 +265,24 @@ public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjec */ public static ConstantObjectInspector getPrimitiveWritableConstantObjectInspector( PrimitiveCategory primitiveCategory, Object value) { + return getPrimitiveWritableConstantObjectInspector( + PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs(primitiveCategory, null), + value); + } + + /** + * Returns a PrimitiveWritableObjectInspector which implements ConstantObjectInspector + * for the PrimitiveCategory. + * + * @param primitiveCategory + * @param typeParams Type qualifiers for the type (if applicable) + * @param value + */ + public static ConstantObjectInspector getPrimitiveWritableConstantObjectInspector( + PrimitiveTypeSpec typeSpecs, Object value) { + PrimitiveCategory primitiveCategory = typeSpecs.getPrimitiveCategory(); + BaseTypeParams typeParams = typeSpecs.getTypeParams(); + switch (primitiveCategory) { case BOOLEAN: return new WritableConstantBooleanObjectInspector((BooleanWritable)value); @@ -265,6 +300,9 @@ public static ConstantObjectInspector getPrimitiveWritableConstantObjectInspecto return new WritableConstantDoubleObjectInspector((DoubleWritable)value); case STRING: return new WritableConstantStringObjectInspector((Text)value); + case VARCHAR: + return new WritableConstantHiveVarcharObjectInspector((HiveVarcharWritable)value, + (VarcharTypeParams) typeParams); case DATE: return new WritableConstantDateObjectInspector((DateWritable)value); case TIMESTAMP: @@ -328,8 +366,14 @@ public static AbstractPrimitiveJavaObjectInspector getPrimitiveJavaObjectInspect if (oi == null) { // Do a bit of validation - not all primitive types use parameters. switch (primitiveCategory) { - // Create type info and add to cache - // Currently no existing parameterized types + case VARCHAR: + PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, + primitiveTypeParams); + oi = new JavaHiveVarcharObjectInspector(typeEntry); + oi.setTypeParams(primitiveTypeParams); + cachedParameterizedPrimitiveJavaObjectInspectorCache.setObjectInspector(oi); + break; default: throw new RuntimeException( "Primitve type " + primitiveCategory + " should not take parameters"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 80c0d86..6bfc584 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -29,12 +29,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; @@ -45,6 +47,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -172,7 +175,7 @@ public PrimitiveTypeEntry addParameters(String[] parameters) { return result; } catch (Exception err) { LOG.error("Error while setting type parameters: " + err); - return null; + throw new RuntimeException(err); } } @@ -212,7 +215,8 @@ public static BaseTypeParams createTypeParams(String typeName, String[] paramete return null; } } catch (Exception err) { - throw new SerDeException("Error creating type params for " + typeName, err); + throw new SerDeException("Error creating type params for " + typeName + + ": " + err, err); } } @@ -296,6 +300,9 @@ static void registerType(PrimitiveTypeEntry t) { public static final PrimitiveTypeEntry decimalTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DECIMAL, serdeConstants.DECIMAL_TYPE_NAME, null, HiveDecimal.class, HiveDecimalWritable.class, null); + public static final PrimitiveTypeEntry varcharTypeEntry = new PrimitiveTypeEntry( + PrimitiveCategory.VARCHAR, serdeConstants.VARCHAR_TYPE_NAME, null, HiveVarchar.class, + HiveVarcharWritable.class, VarcharTypeParams.class); // The following is a complex type for special handling public static final PrimitiveTypeEntry unknownTypeEntry = new PrimitiveTypeEntry( @@ -304,6 +311,7 @@ static void registerType(PrimitiveTypeEntry t) { static { registerType(binaryTypeEntry); registerType(stringTypeEntry); + registerType(varcharTypeEntry); registerType(booleanTypeEntry); registerType(intTypeEntry); registerType(longTypeEntry); @@ -428,14 +436,24 @@ public static PrimitiveTypeEntry getTypeEntryFromTypeName(String typeName) { public static PrimitiveTypeEntry getTypeEntryFromTypeSpecs( PrimitiveCategory primitiveCategory, BaseTypeParams typeParams) { - String typeString = primitiveCategory.toString().toLowerCase(); - if (typeParams != null) { - typeString += typeParams.toString(); + if (typeParams == null) { + // No type params, can just use the primitive category + return getTypeEntryFromPrimitiveCategory(primitiveCategory); } + + // Type params were passed in. First check for cached version + String typeString = primitiveCategory.toString().toLowerCase(); + typeString += typeParams.toString(); PrimitiveTypeEntry typeEntry = getTypeEntryFromTypeName(typeString); if (typeEntry == null) { // Parameterized type doesn't exist yet, create now. - typeEntry = (PrimitiveTypeEntry)getTypeEntryFromTypeSpecs(primitiveCategory, null).clone(); + typeEntry = + (PrimitiveTypeEntry) getTypeEntryFromPrimitiveCategory(primitiveCategory).clone(); + if (!typeEntry.isParameterized()) { + throw new IllegalArgumentException( + primitiveCategory + " type was being used with type parameters " + + typeParams + ", which should not be allowed"); + } typeEntry.typeParams = typeParams; addParameterizedType(typeEntry); } @@ -491,6 +509,10 @@ public static boolean comparePrimitiveObjects(Object o1, .getPrimitiveWritableObject(o2); return t1.equals(t2); } + case VARCHAR: { + return ((HiveVarcharObjectInspector)oi1).getPrimitiveWritableObject(o1) + .equals(((HiveVarcharObjectInspector)oi2).getPrimitiveWritableObject(o2)); + } case DATE: { return ((DateObjectInspector) oi1).getPrimitiveWritableObject(o1) .equals(((DateObjectInspector) oi2).getPrimitiveWritableObject(o2)); @@ -694,6 +716,10 @@ public static int getInt(Object o, PrimitiveObjectInspector oi) { } break; } + case VARCHAR: { + result = Integer.parseInt(getString(o, oi)); + break; + } case TIMESTAMP: result = (int) (((TimestampObjectInspector) oi) .getPrimitiveWritableObject(o).getSeconds()); @@ -753,6 +779,10 @@ public static long getLong(Object o, PrimitiveObjectInspector oi) { result = Long.parseLong(s); } break; + case VARCHAR: { + result = Long.parseLong(getString(o, oi)); + break; + } case TIMESTAMP: result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) .getSeconds(); @@ -806,6 +836,9 @@ public static double getDouble(Object o, PrimitiveObjectInspector oi) { String s = soi.getPrimitiveJavaObject(o); result = Double.parseDouble(s); break; + case VARCHAR: + result = Double.parseDouble(getString(o, oi)); + break; case TIMESTAMP: result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getDouble(); break; @@ -871,6 +904,10 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { StringObjectInspector soi = (StringObjectInspector) oi; result = soi.getPrimitiveJavaObject(o); break; + case VARCHAR: + HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) oi; + result = hcoi.getPrimitiveJavaObject(o).toString(); + break; case DATE: result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString(); break; @@ -888,6 +925,28 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { return result; } + public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) { + + if (o == null) { + return null; + } + + HiveVarchar result = null; + switch (oi.getPrimitiveCategory()) { + case VARCHAR: + result = ((HiveVarcharObjectInspector)oi).getPrimitiveJavaObject(o); + break; + default: + // Is there a way to provide char length here? + // It might actually be ok as long as there is an object inspector (with char length) + // receiving this value. + result = new HiveVarchar(); + result.setValue(getString(o, oi)); + break; + } + return result; + } + public static BytesWritable getBinary(Object o, PrimitiveObjectInspector oi) { if (null == o) { @@ -951,6 +1010,9 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi) case STRING: result = new HiveDecimal(((StringObjectInspector) oi).getPrimitiveJavaObject(o)); break; + case VARCHAR: + result = new HiveDecimal(getString(o, oi)); + break; case TIMESTAMP: Double ts = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) .getDouble(); @@ -986,6 +1048,15 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { result = null; } break; + case VARCHAR: { + try { + String val = getString(o, oi).trim(); + result = Date.valueOf(val); + } catch (IllegalArgumentException e) { + result = null; + } + break; + } case DATE: result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get(); break; @@ -1041,6 +1112,9 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { String s = soi.getPrimitiveJavaObject(o); result = getTimestampFromString(s); break; + case VARCHAR: + result = getTimestampFromString(getString(o, oi)); + break; case DATE: result = new Timestamp( ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get().getTime()); @@ -1109,6 +1183,7 @@ public static PrimitiveGrouping getPrimitiveGrouping(PrimitiveCategory primitive case DECIMAL: return PrimitiveGrouping.NUMERIC_GROUP; case STRING: + case VARCHAR: return PrimitiveGrouping.STRING_GROUP; case BOOLEAN: return PrimitiveGrouping.BOOLEAN_GROUP; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveVarcharObjectInspector.java new file mode 100644 index 0000000..4f75f13 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveVarcharObjectInspector.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; + +public interface SettableHiveVarcharObjectInspector extends HiveVarcharObjectInspector { + Object set(Object o, HiveVarchar value); + + Object set(Object o, String value); + + Object create(HiveVarchar value); + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveVarcharObjectInspector.java new file mode 100644 index 0000000..8d70dc5 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveVarcharObjectInspector.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +/** + * A WritableConstantHiveVarcharObjectInspector is a WritableHiveVarcharObjectInspector + * that implements ConstantObjectInspector. + */ +public class WritableConstantHiveVarcharObjectInspector extends + WritableHiveVarcharObjectInspector implements + ConstantObjectInspector { + + protected HiveVarcharWritable value; + + WritableConstantHiveVarcharObjectInspector(HiveVarcharWritable value) { + this(value, null); + } + + WritableConstantHiveVarcharObjectInspector(HiveVarcharWritable value, + VarcharTypeParams typeParams) { + super(PrimitiveObjectInspectorUtils.varcharTypeEntry); + this.value = value; + + // If we have been provided with type params, then use them. + // Otherwise determine character length and update type params/typeinfo accordingly. + if (typeParams == null) { + typeParams = new VarcharTypeParams(); + typeParams.length = this.value.getCharacterLength(); + } + setTypeParams(typeParams); + this.typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + PrimitiveCategory.VARCHAR, + typeParams); + } + + @Override + public HiveVarcharWritable getWritableConstantValue() { + return value; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java new file mode 100644 index 0000000..a8b5a94 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; + +public class WritableHiveVarcharObjectInspector + extends AbstractPrimitiveWritableObjectInspector + implements SettableHiveVarcharObjectInspector { + + private static final Log LOG = LogFactory.getLog(WritableHiveVarcharObjectInspector.class); + + public WritableHiveVarcharObjectInspector(PrimitiveTypeEntry typeEntry) { + super(typeEntry); + if (typeEntry.primitiveCategory != PrimitiveCategory.VARCHAR) { + throw new RuntimeException( + "TypeEntry of type varchar expected, got " + typeEntry.primitiveCategory); + } + } + + @Override + public HiveVarchar getPrimitiveJavaObject(Object o) { + // check input object's length, if it doesn't match + // then output a new primitive with the correct params. + if (o == null) { + return null; + } + HiveVarcharWritable writable = ((HiveVarcharWritable)o); + if (doesWritableMatchTypeParams(writable)) { + return writable.getHiveVarchar(); + } + return getPrimitiveWithParams(writable); + } + + public HiveVarcharWritable getPrimitiveWritableObject(Object o) { + // check input object's length, if it doesn't match + // then output new writable with correct params. + if (o == null) { + return null; + } + HiveVarcharWritable writable = ((HiveVarcharWritable)o); + if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) { + return writable; + } + + return getWritableWithParams(writable); + } + + private HiveVarchar getPrimitiveWithParams(HiveVarcharWritable val) { + HiveVarchar hv = new HiveVarchar(); + hv.setValue(val.getHiveVarchar(), getMaxLength()); + return hv; + } + + private HiveVarcharWritable getWritableWithParams(HiveVarcharWritable val) { + HiveVarcharWritable newValue = new HiveVarcharWritable(); + newValue.set(val, getMaxLength()); + return newValue; + } + + private boolean doesWritableMatchTypeParams(HiveVarcharWritable writable) { + return ParameterizedPrimitiveTypeUtils.doesWritableMatchTypeParams( + writable, (VarcharTypeParams) typeParams); + } + + private boolean doesPrimitiveMatchTypeParams(HiveVarchar value) { + return ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams( + value, (VarcharTypeParams) typeParams); + } + + @Override + public Object copyObject(Object o) { + if (o == null) { + return null; + } + HiveVarcharWritable writable = (HiveVarcharWritable)o; + if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) { + return new HiveVarcharWritable(writable); + } + return getWritableWithParams(writable); + } + + @Override + public Object set(Object o, HiveVarchar value) { + HiveVarcharWritable writable = (HiveVarcharWritable)o; + writable.set(value, getMaxLength()); + return o; + } + + @Override + public Object set(Object o, String value) { + HiveVarcharWritable writable = (HiveVarcharWritable)o; + writable.set(value, getMaxLength()); + return o; + } + + @Override + public Object create(HiveVarchar value) { + HiveVarcharWritable ret; + ret = new HiveVarcharWritable(); + ret.set(value, getMaxLength()); + return ret; + } + + public int getMaxLength() { + return typeParams != null ? ((VarcharTypeParams) typeParams).length : -1; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java index 9dcf4cc..8a48349 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java @@ -1,5 +1,9 @@ package org.apache.hadoop.hive.serde2.typeinfo; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; @@ -40,4 +44,28 @@ public static BaseTypeParams getTypeParamsFromPrimitiveObjectInspector( return oi.getTypeParams(); } + /** + * Utils for varchar type + */ + public static class HiveVarcharSerDeHelper { + public int maxLength; + public HiveVarcharWritable writable = new HiveVarcharWritable(); + + public HiveVarcharSerDeHelper(VarcharTypeParams typeParams) { + if (typeParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + maxLength = typeParams.getLength(); + } + } + + public static boolean doesWritableMatchTypeParams(HiveVarcharWritable writable, + VarcharTypeParams typeParams) { + return (typeParams == null || typeParams.length >= writable.getCharacterLength()); + } + + public static boolean doesPrimitiveMatchTypeParams(HiveVarchar value, + VarcharTypeParams typeParams) { + return (typeParams == null || typeParams.length == value.getCharacterLength()); + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java index 55c1069..36a7008 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java @@ -49,6 +49,15 @@ protected TypeInfo() { */ public abstract String getTypeName(); + /** + * String representing the qualified type name. + * Qualified types should override this method. + * @return + */ + public String getQualifiedName() { + return getTypeName(); + } + @Override public String toString() { return getTypeName(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java index 48fbeae..05dbb81 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -25,6 +25,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; @@ -67,6 +68,16 @@ public static TypeInfo getPrimitiveTypeInfo(String typeName) { } } else { // No type params + + // Prevent creation of varchar TypeInfo with no length specification. + // This can happen if an old-style UDF uses a varchar type either as an + // argument or return type in an evaluate() function, or other instances + // of using reflection-based methods for retrieving a TypeInfo. + if (typeEntry.primitiveCategory == PrimitiveCategory.VARCHAR) { + LOG.error("varchar type used with no type params"); + throw new RuntimeException("varchar type used with no type params"); + } + result = new PrimitiveTypeInfo(parts.typeName); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 3d1c50f..0ae4331 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -29,6 +29,8 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -736,4 +738,24 @@ public static boolean isConversionRequiredForComparison(TypeInfo typeA, TypeInfo } return true; } + + /** + * Return the character length of the type + * @param typeInfo + * @return + */ + public static int getCharacterLengthForType(PrimitiveTypeInfo typeInfo) { + switch (typeInfo.getPrimitiveCategory()) { + case STRING: + return HiveVarchar.MAX_VARCHAR_LENGTH; + case VARCHAR: + VarcharTypeParams varcharParams = (VarcharTypeParams) typeInfo.getTypeParams(); + if (varcharParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + return varcharParams.getLength(); + default: + return 0; + } + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeParams.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeParams.java new file mode 100644 index 0000000..c602918 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeParams.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.io.WritableUtils; + +public class VarcharTypeParams extends BaseTypeParams implements Serializable { + private static final long serialVersionUID = 1L; + + public int length; + + @Override + public void validateParams() throws SerDeException { + if (length < 1) { + throw new SerDeException("VARCHAR length must be positive"); + } + if (length > HiveVarchar.MAX_VARCHAR_LENGTH) { + throw new SerDeException("Length " + length + + " exceeds max varchar length of " + HiveVarchar.MAX_VARCHAR_LENGTH); + } + } + + @Override + public void populateParams(String[] params) throws SerDeException { + if (params.length != 1) { + throw new SerDeException("Invalid number of parameters for VARCHAR"); + } + try { + length = Integer.valueOf(params[0]); + } catch (NumberFormatException err) { + throw new SerDeException("Error setting VARCHAR length: " + err); + } + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("("); + sb.append(length); + sb.append(")"); + return sb.toString(); + } + + @Override + public void readFields(DataInput in) throws IOException { + length = WritableUtils.readVInt(in); + try { + validateParams(); + } catch (SerDeException err) { + throw new IOException(err); + } + } + + @Override + public void write(DataOutput out) throws IOException { + WritableUtils.writeVInt(out, length); + } + + public int getLength() { + return length; + } + + public void setLength(int len) { + length = len; + } + + @Override + public boolean hasCharacterMaximumLength() { + return true; + } + @Override + public int getCharacterMaximumLength() { + return length; + } +} diff --git serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java new file mode 100644 index 0000000..798987c --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.typeinfo; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +public class TestTypeInfoUtils extends TestCase { + + static void parseTypeString(String typeString, boolean exceptionExpected) { + boolean caughtException = false; + try { + TypeInfoUtils.getTypeInfoFromTypeString(typeString); + } catch (IllegalArgumentException err) { + caughtException = true; + } + assertEquals("parsing typestring " + typeString, exceptionExpected, caughtException); + } + + public void testTypeInfoParser() { + String[] validTypeStrings = { + "int", + "string", + "varchar(10)", + "array" + }; + + String[] invalidTypeStrings = { + "array<", + "varchar(123", + "varchar(123,", + "varchar()", + "varchar(" + }; + + for (String typeString : validTypeStrings) { + parseTypeString(typeString, false); + } + for (String typeString : invalidTypeStrings) { + parseTypeString(typeString, true); + } + } + + public void testVarcharNoParams() { + boolean caughtException = false; + try { + TypeInfoUtils.getTypeInfoFromTypeString("varchar"); + } catch (Exception err) { + caughtException = true; + } + assertEquals("varchar TypeInfo with no params should fail", true, caughtException); + } +}