diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java index b230410..f0c9d66 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java @@ -53,7 +53,7 @@ public static String enforceMaxLength(String val, int maxLength) { if (maxLength > 0) { int valLength = val.codePointCount(0, val.length()); if (valLength > maxLength) { - // Truncate the excess trailing spaces to fit the character length. + // Truncate the excess chars to fit the character length. // Also make sure we take supplementary chars into account. value = val.substring(0, val.offsetByCodePoints(0, maxLength)); } @@ -61,6 +61,25 @@ public static String enforceMaxLength(String val, int maxLength) { return value; } + public static String getPaddedValue(String val, int maxLength) { + if (maxLength < 0) { + return val; + } + + int valLength = val.codePointCount(0, val.length()); + if (valLength > maxLength) { + return enforceMaxLength(val, maxLength); + } + + if (maxLength > valLength) { + // Make sure we pad the right amount of spaces; valLength is in terms of code points, + // while StringUtils.rpad() is based on the number of java chars. + int padLength = val.length() + (maxLength - valLength); + val = StringUtils.rightPad(val, padLength); + } + return val; + } + public String getValue() { return value; } @@ -71,4 +90,14 @@ public int getCharacterLength() { } return characterLength; } + + @Override + public int hashCode() { + return getValue().hashCode(); + } + + @Override + public String toString() { + return getValue(); + } } diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java new file mode 100644 index 0000000..796288c --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/type/HiveChar.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import org.apache.commons.lang.StringUtils; + +public class HiveChar extends HiveBaseChar + implements Comparable { + + public static final int MAX_CHAR_LENGTH = 255; + + // String value is stripped of trailing spaces. This keeps track of the padded length. + protected int maxLength = -1; + + public HiveChar() { + } + + public HiveChar(String val, int len) { + setValue(val, len); + } + + public HiveChar(HiveChar hc, int len) { + setValue(hc.getStrippedValue(), len); + } + + public void setValue(String val, int len) { + super.setValue(StringUtils.stripEnd(val, " "), len); + maxLength = len; + } + + public void setValue(String val) { + setValue(val, maxLength); + } + + public int getMaxLength() { + return maxLength; + } + + public void setMaxLength(int maxLength) { + this.maxLength = maxLength; + } + + public String getStrippedValue() { + return value; + } + + public String getPaddedValue() { + return getPaddedValue(getStrippedValue(), maxLength); + } + + public String toString() { + return getPaddedValue(); + } + + public int compareTo(HiveChar rhs) { + if (rhs == this) { + return 0; + } + return this.getStrippedValue().compareTo(rhs.getStrippedValue()); + } + + public boolean equals(Object rhs) { + if (rhs == this) { + return true; + } + if (rhs.getClass() != getClass()) { + return false; + } + return this.getStrippedValue().equals(((HiveChar) rhs).getStrippedValue()); + } +} diff --git common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java index 36c6879..969d474 100644 --- common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java +++ common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java @@ -51,11 +51,6 @@ public void setValue(HiveVarchar hc) { super.setValue(hc.getValue(), -1); } - @Override - public String toString() { - return getValue(); - } - public int compareTo(HiveVarchar rhs) { if (rhs == this) { return 0; @@ -69,9 +64,4 @@ public boolean equals(HiveVarchar rhs) { } return this.getValue().equals(rhs.getValue()); } - - @Override - public int hashCode() { - return getValue().hashCode(); - } } diff --git common/src/java/org/apache/hive/common/util/HiveStringUtils.java common/src/java/org/apache/hive/common/util/HiveStringUtils.java index fa995cd..c21c937 100644 --- common/src/java/org/apache/hive/common/util/HiveStringUtils.java +++ common/src/java/org/apache/hive/common/util/HiveStringUtils.java @@ -39,6 +39,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; +import org.apache.hadoop.io.Text; /** * HiveStringUtils @@ -807,4 +808,22 @@ public static String camelize(String s) { return sb.toString(); } + /** + * Checks if b is the first byte of a UTF-8 character. + * + */ + public static boolean isUtfStartByte(byte b) { + return (b & 0xC0) != 0x80; + } + + public static int getTextUtfLength(Text t) { + byte[] data = t.getBytes(); + int len = 0; + for (int i = 0; i < t.getLength(); i++) { + if (isUtfStartByte(data[i])) { + len++; + } + } + return len; + } } diff --git common/src/test/org/apache/hadoop/hive/common/type/TestHiveBaseChar.java common/src/test/org/apache/hadoop/hive/common/type/TestHiveBaseChar.java new file mode 100644 index 0000000..2f24c62 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/type/TestHiveBaseChar.java @@ -0,0 +1,82 @@ +package org.apache.hadoop.hive.common.type; + +import java.util.Random; + +import junit.framework.TestCase; + +public class TestHiveBaseChar extends TestCase { + static Random rnd = new Random(); + + public static int getRandomSupplementaryChar() { + int lowSurrogate = 0xDC00 + rnd.nextInt(1024); + //return 0xD8000000 + lowSurrogate; + int highSurrogate = 0xD800; + return Character.toCodePoint((char)highSurrogate, (char)lowSurrogate); + } + + public static int getRandomCodePoint() { + int codePoint; + if (rnd.nextDouble() < 0.50) { + codePoint = 32 + rnd.nextInt(90); + } else { + codePoint = getRandomSupplementaryChar(); + } + if (!Character.isValidCodePoint(codePoint)) { + System.out.println(Integer.toHexString(codePoint) + " is not a valid code point"); + } + return codePoint; + } + + public static int getRandomCodePoint(int excludeChar) { + while (true) { + int codePoint = getRandomCodePoint(); + if (codePoint != excludeChar) { + return codePoint; + } + } + } + + public static String createRandomSupplementaryCharString(int len) { + StringBuffer sb = new StringBuffer(); + for (int idx = 0; idx < len; ++idx) { + sb.appendCodePoint(getRandomCodePoint(' ')); + } + return sb.toString(); + } + + public void testStringLength() throws Exception { + int strLen = 20; + int[] lengths = { 15, 20, 25 }; + // Try with supplementary characters + for (int idx1 = 0; idx1 < lengths.length; ++idx1) { + // Create random test string + int curLen = lengths[idx1]; + String testString = createRandomSupplementaryCharString(curLen); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + } + + public void testGetPaddedValue() { + int strLen = 20; + int[] lengths = { 15, 20, 25 }; + for (int idx1 = 0; idx1 < lengths.length; ++idx1) { + int curLen = lengths[idx1]; + // Random test string + String testString = createRandomSupplementaryCharString(curLen); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String paddedString = HiveBaseChar.getPaddedValue(testString, strLen); + assertEquals(strLen, paddedString.codePointCount(0, paddedString.length())); + } + + assertEquals("abc ", HiveBaseChar.getPaddedValue("abc", 10)); + assertEquals("abc ", HiveBaseChar.getPaddedValue("abc ", 10)); + } +} diff --git common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java new file mode 100644 index 0000000..5f91f33 --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/type/TestHiveChar.java @@ -0,0 +1,77 @@ +package org.apache.hadoop.hive.common.type; + +import junit.framework.TestCase; + +public class TestHiveChar extends TestCase { + + public void testBasic() { + HiveChar hc = new HiveChar("abc", 10); + assertEquals("abc ", hc.toString()); + assertEquals("abc", hc.getStrippedValue()); + + hc.setValue("abc123"); + assertEquals("abc123 ", hc.toString()); + assertEquals("abc123", hc.getStrippedValue()); + + hc.setValue("xyz", 15); + assertEquals("xyz ", hc.toString()); + assertEquals("xyz", hc.getStrippedValue()); + + // initial value is stripped of trailing spaces + hc.setValue("abc ", 5); + assertEquals("abc", hc.getStrippedValue()); + assertEquals(3, hc.getCharacterLength()); + } + + public void testStringLength() { + HiveChar hc = new HiveChar(); + + hc.setValue("0123456789", 5); + assertEquals("01234", hc.toString()); + + hc.setValue("0123456789", 10); + assertEquals("0123456789", hc.toString()); + + hc.setValue("0123456789", 15); + assertEquals("0123456789 ", hc.toString()); + } + + public void testComparison() { + HiveChar hc1 = new HiveChar(); + HiveChar hc2 = new HiveChar(); + + // Identical strings + hc1.setValue("abc", 3); + hc2.setValue("abc", 3); + assertEquals(hc1, hc2); + assertEquals(hc2, hc1); + assertEquals(0, hc1.compareTo(hc2)); + assertEquals(0, hc2.compareTo(hc1)); + + // Unequal strings + hc1.setValue("abc", 3); + hc1.setValue("123", 3); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Trailing spaces are not significant + hc1.setValue("abc", 3); + hc2.setValue("abc", 5); + assertEquals("abc", hc1.toString()); + assertEquals("abc ", hc2.toString()); + assertEquals(hc1, hc2); + assertEquals(hc2, hc1); + assertEquals(0, hc1.compareTo(hc2)); + assertEquals(0, hc2.compareTo(hc1)); + + // Leading space is significant + hc1.setValue(" abc", 3); + hc2.setValue("abc", 3); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 898b6a5..7b94986 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -360,6 +360,8 @@ GenericUDFToDecimal.class); registerGenericUDF(serdeConstants.VARCHAR_TYPE_NAME, GenericUDFToVarchar.class); + registerGenericUDF(serdeConstants.CHAR_TYPE_NAME, + GenericUDFToChar.class); // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); @@ -657,9 +659,15 @@ public static TypeInfo getTypeInfoForPrimitiveCategory( PrimitiveTypeInfo a, PrimitiveTypeInfo b, PrimitiveCategory typeCategory) { // For types with parameters (like varchar), we need to determine the type parameters // that should be added to this type, based on the original 2 TypeInfos. + int maxLength; switch (typeCategory) { + case CHAR: + maxLength = getCommonLength( + TypeInfoUtils.getCharacterLengthForType(a), + TypeInfoUtils.getCharacterLengthForType(b)); + return TypeInfoFactory.getCharTypeInfo(maxLength); case VARCHAR: - int maxLength = getCommonLength( + maxLength = getCommonLength( TypeInfoUtils.getCharacterLengthForType(a), TypeInfoUtils.getCharacterLengthForType(b)); return TypeInfoFactory.getVarcharTypeInfo(maxLength); @@ -1500,7 +1508,7 @@ private static boolean isOpCast(ExprNodeDesc desc) { udfClass == UDFToDouble.class || udfClass == UDFToFloat.class || udfClass == UDFToInteger.class || udfClass == UDFToLong.class || udfClass == UDFToShort.class || udfClass == UDFToString.class || - udfClass == GenericUDFToVarchar.class || + udfClass == GenericUDFToVarchar.class || udfClass == GenericUDFToChar.class || udfClass == GenericUDFTimestamp.class || udfClass == GenericUDFToBinary.class || udfClass == GenericUDFToDate.class || udfClass == GenericUDFToDecimal.class; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index b599d62..7443ea4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -127,6 +127,10 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.mapred.InputFormat; @@ -150,6 +154,7 @@ TokenToTypeName.put(HiveParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_CHAR, serdeConstants.CHAR_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); @@ -168,6 +173,10 @@ public static String getTypeName(ASTNode node) throws SemanticException { } switch (token) { + case HiveParser.TOK_CHAR: + CharTypeInfo charTypeInfo = ParseUtils.getCharTypeInfo(node); + typeName = charTypeInfo.getQualifiedName(); + break; case HiveParser.TOK_VARCHAR: VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(node); typeName = varcharTypeInfo.getQualifiedName(); diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index ca667d4..366b714 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -105,6 +105,7 @@ KW_DATETIME: 'DATETIME'; KW_TIMESTAMP: 'TIMESTAMP'; KW_DECIMAL: 'DECIMAL'; KW_STRING: 'STRING'; +KW_CHAR: 'CHAR'; KW_VARCHAR: 'VARCHAR'; KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index b8b84e5..2343a2c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -110,6 +110,7 @@ TOK_DATELITERAL; TOK_DATETIME; TOK_TIMESTAMP; TOK_STRING; +TOK_CHAR; TOK_VARCHAR; TOK_BINARY; TOK_DECIMAL; @@ -1781,6 +1782,7 @@ primitiveType | KW_BINARY -> TOK_BINARY | KW_DECIMAL (LPAREN prec=Number (COMMA scale=Number)? RPAREN)? -> ^(TOK_DECIMAL $prec? $scale?) | KW_VARCHAR LPAREN length=Number RPAREN -> ^(TOK_VARCHAR $length) + | KW_CHAR LPAREN length=Number RPAREN -> ^(TOK_CHAR $length) ; listType diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index 1ac0a2d..2c796e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -26,6 +26,9 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -131,6 +134,16 @@ public static VarcharTypeInfo getVarcharTypeInfo(ASTNode node) return TypeInfoFactory.getVarcharTypeInfo(Integer.valueOf(lengthStr)); } + public static CharTypeInfo getCharTypeInfo(ASTNode node) + throws SemanticException { + if (node.getChildCount() != 1) { + throw new SemanticException("Bad params for type char"); + } + + String lengthStr = node.getChild(0).getText(); + return TypeInfoFactory.getCharTypeInfo(Integer.valueOf(lengthStr)); + } + static int getIndex(String[] list, String elem) { for(int i=0; i < list.length; i++) { if (list[i].toLowerCase().equals(elem)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 47d7995..672af8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -62,6 +62,8 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; @@ -573,6 +575,8 @@ public static ColumnExprProcessor getColumnExprProcessor() { serdeConstants.DOUBLE_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_CHAR, + serdeConstants.CHAR_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY, @@ -796,8 +800,17 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, if (isFunction) { ASTNode funcNameNode = (ASTNode)expr.getChild(0); switch (funcNameNode.getType()) { + case HiveParser.TOK_CHAR: + // Add type params + CharTypeInfo charTypeInfo = + (CharTypeInfo) ParseUtils.getCharTypeInfo(funcNameNode); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setTypeInfo(charTypeInfo); + } + break; case HiveParser.TOK_VARCHAR: - VarcharTypeInfo varcharTypeInfo = ParseUtils.getVarcharTypeInfo(funcNameNode); + VarcharTypeInfo varcharTypeInfo = + (VarcharTypeInfo) ParseUtils.getVarcharTypeInfo(funcNameNode); if (genericUDF != null) { ((SettableUDF)genericUDF).setTypeInfo(varcharTypeInfo); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 3e3fe33..7348478 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -83,6 +83,7 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) case DOUBLE: return new GenericUDAFDoubleStatsEvaluator(); case STRING: + case CHAR: case VARCHAR: return new GenericUDAFStringStatsEvaluator(); case BINARY: diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java index af58b38..d4d7e7c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; /** @@ -62,7 +63,9 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen // Loop through all the inputs to determine the appropriate return type/length. // Return type: + // All CHAR inputs: return CHAR // All VARCHAR inputs: return VARCHAR + // All CHAR/VARCHAR inputs: return VARCHAR // All BINARY inputs: return BINARY // Otherwise return STRING argumentOIs = arguments; @@ -88,10 +91,14 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen returnType = PrimitiveCategory.STRING; } break; + case CHAR: case VARCHAR: if (!fixedLengthReturnValue) { returnType = PrimitiveCategory.STRING; } + if (fixedLengthReturnValue && currentCategory == PrimitiveCategory.VARCHAR) { + returnType = PrimitiveCategory.VARCHAR; + } break; default: returnType = PrimitiveCategory.STRING; @@ -104,8 +111,10 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen // max length for the char/varchar, then the return type reverts to string. if (fixedLengthReturnValue) { returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi); - if (returnType == PrimitiveCategory.VARCHAR - && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) { + if ((returnType == PrimitiveCategory.VARCHAR + && returnLength > HiveVarchar.MAX_VARCHAR_LENGTH) + || (returnType == PrimitiveCategory.CHAR + && returnLength > HiveChar.MAX_CHAR_LENGTH)) { returnType = PrimitiveCategory.STRING; fixedLengthReturnValue = false; } @@ -119,11 +128,15 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen // treat all inputs as string, the return value will be converted to the appropriate type. createStringConverters(); returnHelper = new GenericUDFUtils.StringHelper(returnType); + BaseCharTypeInfo typeInfo; switch (returnType) { case STRING: return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + case CHAR: + typeInfo = TypeInfoFactory.getCharTypeInfo(returnLength); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); case VARCHAR: - VarcharTypeInfo typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength); + typeInfo = TypeInfoFactory.getVarcharTypeInfo(returnLength); return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); default: throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java index ecb6939..d0222ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java @@ -31,8 +31,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; /** * UDFLower. @@ -65,11 +65,19 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); ObjectInspector outputOI = null; + BaseCharTypeInfo typeInfo; switch (inputType) { + case CHAR: + // return type should have same length as the input. + returnType = inputType; + typeInfo = TypeInfoFactory.getCharTypeInfo( + GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); + outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); + break; case VARCHAR: // return type should have same length as the input. returnType = inputType; - VarcharTypeInfo typeInfo = TypeInfoFactory.getVarcharTypeInfo( + typeInfo = TypeInfoFactory.getVarcharTypeInfo( GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); break; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java new file mode 100644 index 0000000..83e36a5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToChar.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.Serializable; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.SettableUDF; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.HiveCharConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; + +@Description(name = "char", +value = "CAST( as CHAR(length)) - Converts the argument to a char value.", +extended = "Values will be truncated if the input value is too long to fit" ++ " within the char length." ++ "Example:\n " ++ " > SELECT CAST(1234 AS char(10)) FROM src LIMIT 1;\n" ++ " '1234'") +public class GenericUDFToChar extends GenericUDF implements SettableUDF, Serializable { + private static final Log LOG = LogFactory.getLog(GenericUDFToChar.class.getName()); + + private transient PrimitiveObjectInspector argumentOI; + private transient HiveCharConverter converter; + + // The char type info need to be set prior to initialization, + // and must be preserved when the plan serialized to other processes. + private CharTypeInfo typeInfo; + + public GenericUDFToChar() { + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentException("CHAR cast requires a value argument"); + } + try { + argumentOI = (PrimitiveObjectInspector) arguments[0]; + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function CHAR takes only primitive types"); + } + + // Check if this UDF has been provided with type params for the output char type + SettableHiveCharObjectInspector outputOI; + outputOI = (SettableHiveCharObjectInspector) + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(typeInfo); + + converter = new HiveCharConverter(argumentOI, outputOI); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + return converter.convert(o0); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 1); + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS CHAR("); + sb.append("" + typeInfo.getLength()); + sb.append(")"); + return sb.toString(); + } + +/** + * Provide char type parameters for the output object inspector. + * This should be done before the UDF is initialized. + */ + @Override + public void setTypeInfo(TypeInfo typeInfo) throws UDFArgumentException { + this.typeInfo = (CharTypeInfo) typeInfo; + } + + @Override + public TypeInfo getTypeInfo() { + return typeInfo; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java index f0e2d9f..c418db2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; @@ -65,14 +66,23 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); ObjectInspector outputOI = null; + BaseCharTypeInfo typeInfo; switch (inputType) { + case CHAR: + // return type should have same length as the input. + returnType = inputType; + typeInfo = TypeInfoFactory.getCharTypeInfo( + GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); + outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + typeInfo); + break; case VARCHAR: // return type should have same length as the input. returnType = inputType; - VarcharTypeInfo varcharTypeInfo = TypeInfoFactory.getVarcharTypeInfo( + typeInfo = TypeInfoFactory.getVarcharTypeInfo( GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - varcharTypeInfo); + typeInfo); break; default: returnType = PrimitiveCategory.STRING; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java index b390f97..689ecfb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; @@ -41,9 +42,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; /** @@ -367,6 +368,9 @@ public StringHelper(PrimitiveCategory type) throws UDFArgumentException { case STRING: returnValue = new Text(); break; + case CHAR: + returnValue = new HiveCharWritable(); + break; case VARCHAR: returnValue = new HiveVarcharWritable(); break; @@ -383,6 +387,9 @@ public Object setReturnValue(String val) throws UDFArgumentException { case STRING: ((Text)returnValue).set(val); return returnValue; + case CHAR: + ((HiveCharWritable) returnValue).set(val); + return returnValue; case VARCHAR: ((HiveVarcharWritable)returnValue).set(val); return returnValue; @@ -402,8 +409,9 @@ public static int getFixedStringSizeForType(PrimitiveObjectInspector poi) throws UDFArgumentException { // TODO: we can support date, int, .. any types which would have a fixed length value switch (poi.getPrimitiveCategory()) { + case CHAR: case VARCHAR: - VarcharTypeInfo typeInfo = (VarcharTypeInfo) poi.getTypeInfo(); + BaseCharTypeInfo typeInfo = (BaseCharTypeInfo) poi.getTypeInfo(); return typeInfo.getLength(); default: throw new UDFArgumentException("No fixed size for type " + poi.getTypeName()); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java index 9ecac2e..f1267d8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -62,6 +62,8 @@ public void typeaffinity2(DoubleWritable x) {} TypeInfo varchar5; TypeInfo varchar10; TypeInfo maxVarchar; + TypeInfo char5; + TypeInfo char10; @Override protected void setUp() { @@ -69,6 +71,8 @@ protected void setUp() { maxVarchar = TypeInfoFactory.getPrimitiveTypeInfo(maxVarcharTypeName); varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); varchar5 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)"); + char10 = TypeInfoFactory.getPrimitiveTypeInfo("char(10)"); + char5 = TypeInfoFactory.getPrimitiveTypeInfo("char(5)"); } private void implicit(TypeInfo a, TypeInfo b, boolean convertible) { @@ -95,6 +99,13 @@ public void testImplicitConversion() { implicit(TypeInfoFactory.stringTypeInfo, varchar20, true); implicit(varchar20, varchar10, true); + implicit(char10, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, char10, true); + implicit(char5, char10, true); + implicit(char5, varchar10, true); + implicit(varchar5, char10, true); + + implicit(TypeInfoFactory.intTypeInfo, char10, true); implicit(TypeInfoFactory.intTypeInfo, varchar10, true); implicit(TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo, true); } @@ -137,6 +148,8 @@ public void testTypeAffinity() { // String type affinity typeAffinity("typeaffinity1", TypeInfoFactory.stringTypeInfo, 1, Text.class); + typeAffinity("typeaffinity1", char5, 1, Text.class); + typeAffinity("typeaffinity1", varchar5, 1, Text.class); // Type affinity does not help when multiple methods have the same type affinity. typeAffinity("typeaffinity2", TypeInfoFactory.shortTypeInfo, 2, null); @@ -191,8 +204,9 @@ public void testGetMethodInternal() { verify(TestUDF.class, "one", TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, IntWritable.class, IntWritable.class, false); - // Passing varchar arguments should prefer the version of evaluate() with Text args. + // Passing char/varchar arguments should prefer the version of evaluate() with Text args. verify(TestUDF.class, "same", varchar5, varchar10, Text.class, Text.class, false); + verify(TestUDF.class, "same", char5, char10, Text.class, Text.class, false); verify(TestUDF.class, "mismatch", TypeInfoFactory.voidTypeInfo, TypeInfoFactory.intTypeInfo, null, null, true); @@ -214,6 +228,10 @@ public void testCommonClass() { common(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); common(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + common(TypeInfoFactory.stringTypeInfo, char10, TypeInfoFactory.stringTypeInfo); + common(char10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + // common class between char/varchar is string? + common(char5, varchar10, TypeInfoFactory.stringTypeInfo); } private void comparison(TypeInfo a, TypeInfo b, TypeInfo result) { @@ -238,6 +256,11 @@ public void testCommonClassComparison() { comparison(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); comparison(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); comparison(varchar5, varchar10, varchar10); + comparison(TypeInfoFactory.stringTypeInfo, char10, TypeInfoFactory.stringTypeInfo); + comparison(char10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + comparison(char5, char10, char10); + // common comparison class for char/varchar is string? + comparison(char10, varchar5, TypeInfoFactory.stringTypeInfo); } /** @@ -304,6 +327,14 @@ public void testCommonClassUnionAll() { unionAll(varchar10, varchar5, varchar10); unionAll(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); unionAll(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + + unionAll(char5, char10, char10); + unionAll(char10, char5, char10); + unionAll(char10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + unionAll(TypeInfoFactory.stringTypeInfo, char10, TypeInfoFactory.stringTypeInfo); + + // common class for char/varchar is string? + comparison(char10, varchar5, TypeInfoFactory.stringTypeInfo); } public void testGetTypeInfoForPrimitiveCategory() { @@ -314,6 +345,14 @@ public void testGetTypeInfoForPrimitiveCategory() { assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) varchar5, PrimitiveCategory.VARCHAR)); + assertEquals(char10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) char5, (PrimitiveTypeInfo) char10, PrimitiveCategory.CHAR)); + assertEquals(char10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) char10, (PrimitiveTypeInfo) char5, PrimitiveCategory.CHAR)); + + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar5, (PrimitiveTypeInfo) char10, PrimitiveCategory.VARCHAR)); + // non-qualified types should simply return the TypeInfo associated with that type assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, diff --git ql/src/test/queries/clientnegative/invalid_char_length_1.q ql/src/test/queries/clientnegative/invalid_char_length_1.q new file mode 100644 index 0000000..ba7d164 --- /dev/null +++ ql/src/test/queries/clientnegative/invalid_char_length_1.q @@ -0,0 +1,2 @@ +drop table invalid_char_length_1; +create table invalid_char_length_1 (c1 char(1000000)); diff --git ql/src/test/queries/clientnegative/invalid_char_length_2.q ql/src/test/queries/clientnegative/invalid_char_length_2.q new file mode 100644 index 0000000..866b43d --- /dev/null +++ ql/src/test/queries/clientnegative/invalid_char_length_2.q @@ -0,0 +1 @@ +select cast(value as char(100000)) from src limit 1; diff --git ql/src/test/queries/clientnegative/invalid_char_length_3.q ql/src/test/queries/clientnegative/invalid_char_length_3.q new file mode 100644 index 0000000..481b630 --- /dev/null +++ ql/src/test/queries/clientnegative/invalid_char_length_3.q @@ -0,0 +1,3 @@ +drop table invalid_char_length_3; +create table invalid_char_length_3 (c1 char(0)); + diff --git ql/src/test/queries/clientpositive/alter_char1.q ql/src/test/queries/clientpositive/alter_char1.q new file mode 100644 index 0000000..4ecb7e7 --- /dev/null +++ ql/src/test/queries/clientpositive/alter_char1.q @@ -0,0 +1,32 @@ +drop table alter_char_1; + +create table alter_char_1 (key string, value string); +insert overwrite table alter_char_1 + select key, value from src order by key limit 5; + +select * from alter_char_1 order by key; + +-- change column to char +alter table alter_char_1 change column value value char(20); +-- contents should still look the same +select * from alter_char_1 order by key; + +-- change column to smaller char +alter table alter_char_1 change column value value char(3); +-- value column should be truncated now +select * from alter_char_1 order by key; + +-- change back to bigger char +alter table alter_char_1 change column value value char(20); +-- column values should be full size again +select * from alter_char_1 order by key; + +-- add char column +alter table alter_char_1 add columns (key2 int, value2 char(10)); +select * from alter_char_1 order by key; + +insert overwrite table alter_char_1 + select key, value, key, value from src order by key limit 5; +select * from alter_char_1 order by key; + +drop table alter_char_1; diff --git ql/src/test/queries/clientpositive/alter_char2.q ql/src/test/queries/clientpositive/alter_char2.q new file mode 100644 index 0000000..7fa9fce --- /dev/null +++ ql/src/test/queries/clientpositive/alter_char2.q @@ -0,0 +1,22 @@ + +-- alter column type, with partitioned table +drop table if exists alter_char2; + +create table alter_char2 ( + c1 char(255) +) partitioned by (hr int); + +insert overwrite table alter_char2 partition (hr=1) + select value from src limit 1; + +select c1, length(c1) from alter_char2; + +alter table alter_char2 change column c1 c1 char(10); + +select hr, c1, length(c1) from alter_char2 where hr = 1; + +insert overwrite table alter_char2 partition (hr=2) + select key from src limit 1; + +select hr, c1, length(c1) from alter_char2 where hr = 1; +select hr, c1, length(c1) from alter_char2 where hr = 2; diff --git ql/src/test/queries/clientpositive/char_1.q ql/src/test/queries/clientpositive/char_1.q new file mode 100644 index 0000000..ea19671 --- /dev/null +++ ql/src/test/queries/clientpositive/char_1.q @@ -0,0 +1,32 @@ +drop table char1; +drop table char1_1; + +create table char1 (key char(10), value char(20)); +create table char1_1 (key string, value string); + +-- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table char1; +select * from char1 order by key, value limit 2; + +-- insert overwrite, from same/different length char +insert overwrite table char1 + select cast(key as char(10)), cast(value as char(15)) from src order by key, value limit 2; +select key, value from char1 order by key, value; + +-- insert overwrite, from string +insert overwrite table char1 + select key, value from src order by key, value limit 2; +select key, value from char1 order by key, value; + +-- insert string from char +insert overwrite table char1_1 + select key, value from char1 order by key, value limit 2; +select key, value from char1_1 order by key, value; + +-- respect string length +insert overwrite table char1 + select key, cast(value as char(3)) from src order by key, value limit 2; +select key, value from char1 order by key, value; + +drop table char1; +drop table char1_1; diff --git ql/src/test/queries/clientpositive/char_2.q ql/src/test/queries/clientpositive/char_2.q new file mode 100644 index 0000000..3e4900c --- /dev/null +++ ql/src/test/queries/clientpositive/char_2.q @@ -0,0 +1,36 @@ +drop table char_2; + +create table char_2 ( + key char(10), + value char(20) +); + +insert overwrite table char_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +drop table char_2; diff --git ql/src/test/queries/clientpositive/char_cast.q ql/src/test/queries/clientpositive/char_cast.q new file mode 100644 index 0000000..7f44d4d --- /dev/null +++ ql/src/test/queries/clientpositive/char_cast.q @@ -0,0 +1,92 @@ + +-- Cast from char to other data types +select + cast(cast('11' as string) as tinyint), + cast(cast('11' as string) as smallint), + cast(cast('11' as string) as int), + cast(cast('11' as string) as bigint), + cast(cast('11.00' as string) as float), + cast(cast('11.00' as string) as double), + cast(cast('11.00' as string) as decimal) +from src limit 1; + +select + cast(cast('11' as char(10)) as tinyint), + cast(cast('11' as char(10)) as smallint), + cast(cast('11' as char(10)) as int), + cast(cast('11' as char(10)) as bigint), + cast(cast('11.00' as char(10)) as float), + cast(cast('11.00' as char(10)) as double), + cast(cast('11.00' as char(10)) as decimal) +from src limit 1; + +select + cast(cast('2011-01-01' as string) as date), + cast(cast('2011-01-01 01:02:03' as string) as timestamp) +from src limit 1; + +select + cast(cast('2011-01-01' as char(10)) as date), + cast(cast('2011-01-01 01:02:03' as char(30)) as timestamp) +from src limit 1; + +-- no tests from string/char to boolean, that conversion doesn't look useful +select + cast(cast('abc123' as string) as string), + cast(cast('abc123' as string) as varchar(10)), + cast(cast('abc123' as string) as char(10)) +from src limit 1; + +select + cast(cast('abc123' as char(10)) as string), + cast(cast('abc123' as char(10)) as varchar(10)), + cast(cast('abc123' as char(10)) as char(10)) +from src limit 1; + +select + cast(cast('abc123' as varchar(10)) as string), + cast(cast('abc123' as varchar(10)) as varchar(10)), + cast(cast('abc123' as varchar(10)) as char(10)) +from src limit 1; + +-- cast from other types to char +select + cast(cast(11 as tinyint) as string), + cast(cast(11 as smallint) as string), + cast(cast(11 as int) as string), + cast(cast(11 as bigint) as string), + cast(cast(11.00 as float) as string), + cast(cast(11.00 as double) as string), + cast(cast(11.00 as decimal) as string) +from src limit 1; + +select + cast(cast(11 as tinyint) as char(10)), + cast(cast(11 as smallint) as char(10)), + cast(cast(11 as int) as char(10)), + cast(cast(11 as bigint) as char(10)), + cast(cast(11.00 as float) as char(10)), + cast(cast(11.00 as double) as char(10)), + cast(cast(11.00 as decimal) as char(10)) +from src limit 1; + +select + cast(date '2011-01-01' as string), + cast(timestamp('2011-01-01 01:02:03') as string) +from src limit 1; + +select + cast(date '2011-01-01' as char(10)), + cast(timestamp('2011-01-01 01:02:03') as char(30)) +from src limit 1; + +select + cast(true as string), + cast(false as string) +from src limit 1; + +select + cast(true as char(10)), + cast(false as char(10)) +from src limit 1; + diff --git ql/src/test/queries/clientpositive/char_comparison.q ql/src/test/queries/clientpositive/char_comparison.q new file mode 100644 index 0000000..e1cfdb2 --- /dev/null +++ ql/src/test/queries/clientpositive/char_comparison.q @@ -0,0 +1,40 @@ + +-- Should all be true +select + cast('abc' as char(10)) = cast('abc' as char(10)), + cast('abc' as char(10)) <= cast('abc' as char(10)), + cast('abc' as char(10)) >= cast('abc' as char(10)), + cast('abc' as char(10)) < cast('abd' as char(10)), + cast('abc' as char(10)) > cast('abb' as char(10)), + cast('abc' as char(10)) <> cast('abb' as char(10)) +from src limit 1; + +-- Different char lengths should still compare the same +select + cast('abc' as char(10)) = cast('abc' as char(3)), + cast('abc' as char(10)) <= cast('abc' as char(3)), + cast('abc' as char(10)) >= cast('abc' as char(3)), + cast('abc' as char(10)) < cast('abd' as char(3)), + cast('abc' as char(10)) > cast('abb' as char(3)), + cast('abc' as char(10)) <> cast('abb' as char(3)) +from src limit 1; + +-- Should work with string types as well +select + cast('abc' as char(10)) = 'abc', + cast('abc' as char(10)) <= 'abc', + cast('abc' as char(10)) >= 'abc', + cast('abc' as char(10)) < 'abd', + cast('abc' as char(10)) > 'abb', + cast('abc' as char(10)) <> 'abb' +from src limit 1; + +-- leading space is significant for char +select + cast(' abc' as char(10)) <> cast('abc' as char(10)) +from src limit 1; + +-- trailing space is not significant for char +select + cast('abc ' as char(10)) = cast('abc' as char(10)) +from src limit 1; diff --git ql/src/test/queries/clientpositive/char_join1.q ql/src/test/queries/clientpositive/char_join1.q new file mode 100644 index 0000000..bb61a76 --- /dev/null +++ ql/src/test/queries/clientpositive/char_join1.q @@ -0,0 +1,35 @@ +drop table char_join1_ch1; +drop table char_join1_ch2; +drop table char_join1_str; + +create table char_join1_ch1 ( + c1 int, + c2 char(10) +); + +create table char_join1_ch2 ( + c1 int, + c2 char(20) +); + +create table char_join1_str ( + c1 int, + c2 string +); + +load data local inpath '../data/files/vc1.txt' into table char_join1_ch1; +load data local inpath '../data/files/vc1.txt' into table char_join1_ch2; +load data local inpath '../data/files/vc1.txt' into table char_join1_str; + +-- Join char with same length char +select * from char_join1_ch1 a join char_join1_ch1 b on (a.c2 = b.c2) order by a.c1; + +-- Join char with different length char +select * from char_join1_ch1 a join char_join1_ch2 b on (a.c2 = b.c2) order by a.c1; + +-- Join char with string +select * from char_join1_ch1 a join char_join1_str b on (a.c2 = b.c2) order by a.c1; + +drop table char_join1_ch1; +drop table char_join1_ch2; +drop table char_join1_str; diff --git ql/src/test/queries/clientpositive/char_nested_types.q ql/src/test/queries/clientpositive/char_nested_types.q new file mode 100644 index 0000000..c710b6c --- /dev/null +++ ql/src/test/queries/clientpositive/char_nested_types.q @@ -0,0 +1,53 @@ +drop table char_nested_1; +drop table char_nested_array; +drop table char_nested_map; +drop table char_nested_struct; +drop table char_nested_cta; +drop table char_nested_view; + +create table char_nested_1 (key int, value char(20)); +insert overwrite table char_nested_1 + select key, value from src order by key limit 1; + +-- arrays +create table char_nested_array (c1 array); +insert overwrite table char_nested_array + select array(value, value) from char_nested_1; +describe char_nested_array; +select * from char_nested_array; + +-- maps +create table char_nested_map (c1 map); +insert overwrite table char_nested_map + select map(key, value) from char_nested_1; +describe char_nested_map; +select * from char_nested_map; + +-- structs +create table char_nested_struct (c1 struct); +insert overwrite table char_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from char_nested_1; +describe char_nested_struct; +select * from char_nested_struct; + +-- nested type with create table as +create table char_nested_cta as + select * from char_nested_struct; +describe char_nested_cta; +select * from char_nested_cta; + +-- nested type with view +create table char_nested_view as + select * from char_nested_struct; +describe char_nested_view; +select * from char_nested_view; + +drop table char_nested_1; +drop table char_nested_array; +drop table char_nested_map; +drop table char_nested_struct; +drop table char_nested_cta; +drop table char_nested_view; diff --git ql/src/test/queries/clientpositive/char_udf1.q ql/src/test/queries/clientpositive/char_udf1.q new file mode 100644 index 0000000..629d41d --- /dev/null +++ ql/src/test/queries/clientpositive/char_udf1.q @@ -0,0 +1,156 @@ +drop table char_udf_1; + +create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)); +insert overwrite table char_udf_1 + select key, value, key, value from src where key = '238' limit 1; + +-- UDFs with char support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from char_udf_1 limit 1; + +select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from char_udf_1 limit 1; + +select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from char_udf_1 limit 1; + +-- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from char_udf_1 limit 1; + +select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from char_udf_1 limit 1; + +select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from char_udf_1 limit 1; + +select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from char_udf_1 limit 1; + +select + length(c2), + length(c4), + length(c2) = length(c4) +from char_udf_1 limit 1; + +select + locate('a', 'abcdabcd', 3), + locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) +from char_udf_1 limit 1; + +select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from char_udf_1 limit 1; + +select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from char_udf_1 limit 1; + +select + regexp(c2, 'val'), + regexp(c4, 'val'), + regexp(c2, 'val') = regexp(c4, 'val') +from char_udf_1 limit 1; + +select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from char_udf_1 limit 1; + +select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from char_udf_1 limit 1; + +select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from char_udf_1 limit 1; + +select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from char_udf_1 limit 1; + +select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from char_udf_1 limit 1; + +select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as char(50))) +from char_udf_1 limit 1; + +select + split(c2, '_'), + split(c4, '_') +from char_udf_1 limit 1; + +select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') +from char_udf_1 limit 1; + +select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from char_udf_1 limit 1; + +select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from char_udf_1 limit 1; + + +-- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from char_udf_1; + +select + min(c2), + min(c4) +from char_udf_1; + +select + max(c2), + max(c4) +from char_udf_1; + + +drop table char_udf_1; diff --git ql/src/test/queries/clientpositive/char_union1.q ql/src/test/queries/clientpositive/char_union1.q new file mode 100644 index 0000000..1ba3568 --- /dev/null +++ ql/src/test/queries/clientpositive/char_union1.q @@ -0,0 +1,47 @@ +drop table char_union1_ch1; +drop table char_union1_ch2; +drop table char_union1_str; + +create table char_union1_ch1 ( + c1 int, + c2 char(10) +); + +create table char_union1_ch2 ( + c1 int, + c2 char(20) +); + +create table char_union1_str ( + c1 int, + c2 string +); + +load data local inpath '../data/files/vc1.txt' into table char_union1_ch1; +load data local inpath '../data/files/vc1.txt' into table char_union1_ch2; +load data local inpath '../data/files/vc1.txt' into table char_union1_str; + +-- union char with same length char +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_ch1 limit 1 +) q1 sort by c1; + +-- union char with different length char +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_ch2 limit 1 +) q1 sort by c1; + +-- union char with string +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_str limit 1 +) q1 sort by c1; + +drop table char_union1_ch1; +drop table char_union1_ch2; +drop table char_union1_str; diff --git ql/src/test/queries/clientpositive/ctas_char.q ql/src/test/queries/clientpositive/ctas_char.q new file mode 100644 index 0000000..ecfe74a --- /dev/null +++ ql/src/test/queries/clientpositive/ctas_char.q @@ -0,0 +1,22 @@ +drop table ctas_char_1; +drop table ctas_char_2; +drop view ctas_char_3; + +create table ctas_char_1 (key char(10), value string); +insert overwrite table ctas_char_1 + select key, value from src sort by key, value limit 5; + +-- create table as with char column +create table ctas_char_2 as select key, value from ctas_char_1; + +-- view with char column +create view ctas_char_3 as select key, value from ctas_char_2; + +select key, value from ctas_char_1; +select * from ctas_char_2; +select * from ctas_char_3; + + +drop table ctas_char_1; +drop table ctas_char_2; +drop view ctas_char_3; diff --git ql/src/test/results/clientnegative/invalid_char_length_1.q.out ql/src/test/results/clientnegative/invalid_char_length_1.q.out new file mode 100644 index 0000000..b74a96b --- /dev/null +++ ql/src/test/results/clientnegative/invalid_char_length_1.q.out @@ -0,0 +1,5 @@ +PREHOOK: query: drop table invalid_char_length_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table invalid_char_length_1 +POSTHOOK: type: DROPTABLE +FAILED: RuntimeException Char length 1000000 out of allowed range [1, 255] diff --git ql/src/test/results/clientnegative/invalid_char_length_2.q.out ql/src/test/results/clientnegative/invalid_char_length_2.q.out new file mode 100644 index 0000000..343a2f3 --- /dev/null +++ ql/src/test/results/clientnegative/invalid_char_length_2.q.out @@ -0,0 +1 @@ +FAILED: RuntimeException Char length 100000 out of allowed range [1, 255] diff --git ql/src/test/results/clientnegative/invalid_char_length_3.q.out ql/src/test/results/clientnegative/invalid_char_length_3.q.out new file mode 100644 index 0000000..88f3752 --- /dev/null +++ ql/src/test/results/clientnegative/invalid_char_length_3.q.out @@ -0,0 +1,5 @@ +PREHOOK: query: drop table invalid_char_length_3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table invalid_char_length_3 +POSTHOOK: type: DROPTABLE +FAILED: RuntimeException Char length 0 out of allowed range [1, 255] diff --git ql/src/test/results/clientpositive/alter_char1.q.out ql/src/test/results/clientpositive/alter_char1.q.out new file mode 100644 index 0000000..1491ed8 --- /dev/null +++ ql/src/test/results/clientpositive/alter_char1.q.out @@ -0,0 +1,199 @@ +PREHOOK: query: drop table alter_char_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table alter_char_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table alter_char_1 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alter_char_1 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alter_char_1 +PREHOOK: query: insert overwrite table alter_char_1 + select key, value from src order by key limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: insert overwrite table alter_char_1 + select key, value from src order by key limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from alter_char_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_char_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: -- change column to char +alter table alter_char_1 change column value value char(20) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_char_1 +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: -- change column to char +alter table alter_char_1 change column value value char(20) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_char_1 +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- contents should still look the same +select * from alter_char_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: query: -- contents should still look the same +select * from alter_char_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: -- change column to smaller char +alter table alter_char_1 change column value value char(3) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_char_1 +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: -- change column to smaller char +alter table alter_char_1 change column value value char(3) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_char_1 +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- value column should be truncated now +select * from alter_char_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: query: -- value column should be truncated now +select * from alter_char_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val +0 val +0 val +10 val +100 val +PREHOOK: query: -- change back to bigger char +alter table alter_char_1 change column value value char(20) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_char_1 +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: -- change back to bigger char +alter table alter_char_1 change column value value char(20) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_char_1 +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- column values should be full size again +select * from alter_char_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: query: -- column values should be full size again +select * from alter_char_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: -- add char column +alter table alter_char_1 add columns (key2 int, value2 char(10)) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@alter_char_1 +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: -- add char column +alter table alter_char_1 add columns (key2 int, value2 char(10)) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@alter_char_1 +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from alter_char_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_char_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 NULL NULL +0 val_0 NULL NULL +0 val_0 NULL NULL +10 val_10 NULL NULL +100 val_100 NULL NULL +PREHOOK: query: insert overwrite table alter_char_1 + select key, value, key, value from src order by key limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: insert overwrite table alter_char_1 + select key, value, key, value from src order by key limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from alter_char_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: query: select * from alter_char_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_10 +100 val_100 100 val_100 +PREHOOK: query: drop table alter_char_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@alter_char_1 +PREHOOK: Output: default@alter_char_1 +POSTHOOK: query: drop table alter_char_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@alter_char_1 +POSTHOOK: Output: default@alter_char_1 +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char_1.value2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/alter_char2.q.out ql/src/test/results/clientpositive/alter_char2.q.out new file mode 100644 index 0000000..28e9368 --- /dev/null +++ ql/src/test/results/clientpositive/alter_char2.q.out @@ -0,0 +1,97 @@ +PREHOOK: query: -- alter column type, with partitioned table +drop table if exists alter_char2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- alter column type, with partitioned table +drop table if exists alter_char2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table alter_char2 ( + c1 char(255) +) partitioned by (hr int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table alter_char2 ( + c1 char(255) +) partitioned by (hr int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@alter_char2 +PREHOOK: query: insert overwrite table alter_char2 partition (hr=1) + select value from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter_char2@hr=1 +POSTHOOK: query: insert overwrite table alter_char2 partition (hr=1) + select value from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter_char2@hr=1 +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select c1, length(c1) from alter_char2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char2 +PREHOOK: Input: default@alter_char2@hr=1 +#### A masked pattern was here #### +POSTHOOK: query: select c1, length(c1) from alter_char2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char2 +POSTHOOK: Input: default@alter_char2@hr=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 7 +PREHOOK: query: alter table alter_char2 change column c1 c1 char(10) +PREHOOK: type: ALTERTABLE_RENAMECOL +PREHOOK: Input: default@alter_char2 +PREHOOK: Output: default@alter_char2 +POSTHOOK: query: alter table alter_char2 change column c1 c1 char(10) +POSTHOOK: type: ALTERTABLE_RENAMECOL +POSTHOOK: Input: default@alter_char2 +POSTHOOK: Output: default@alter_char2 +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char2 +PREHOOK: Input: default@alter_char2@hr=1 +#### A masked pattern was here #### +POSTHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char2 +POSTHOOK: Input: default@alter_char2@hr=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +1 val_238 7 +PREHOOK: query: insert overwrite table alter_char2 partition (hr=2) + select key from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@alter_char2@hr=2 +POSTHOOK: query: insert overwrite table alter_char2 partition (hr=2) + select key from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@alter_char2@hr=2 +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char2 PARTITION(hr=2).c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char2 +PREHOOK: Input: default@alter_char2@hr=1 +#### A masked pattern was here #### +POSTHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char2 +POSTHOOK: Input: default@alter_char2@hr=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char2 PARTITION(hr=2).c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +1 val_238 7 +PREHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@alter_char2 +PREHOOK: Input: default@alter_char2@hr=2 +#### A masked pattern was here #### +POSTHOOK: query: select hr, c1, length(c1) from alter_char2 where hr = 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alter_char2 +POSTHOOK: Input: default@alter_char2@hr=2 +#### A masked pattern was here #### +POSTHOOK: Lineage: alter_char2 PARTITION(hr=1).c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: alter_char2 PARTITION(hr=2).c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +2 238 3 diff --git ql/src/test/results/clientpositive/char_1.q.out ql/src/test/results/clientpositive/char_1.q.out new file mode 100644 index 0000000..a980b13 --- /dev/null +++ ql/src/test/results/clientpositive/char_1.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: drop table char1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char1_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char1_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char1 (key char(10), value char(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char1 (key char(10), value char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char1 +PREHOOK: query: create table char1_1 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char1_1 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char1_1 +PREHOOK: query: -- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table char1 +PREHOOK: type: LOAD +PREHOOK: Output: default@char1 +POSTHOOK: query: -- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table char1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char1 +PREHOOK: query: select * from char1 order by key, value limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: query: select * from char1 order by key, value limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +PREHOOK: query: -- insert overwrite, from same/different length char +insert overwrite table char1 + select cast(key as char(10)), cast(value as char(15)) from src order by key, value limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char1 +POSTHOOK: query: -- insert overwrite, from same/different length char +insert overwrite table char1 + select cast(key as char(10)), cast(value as char(15)) from src order by key, value limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char1 +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from char1 order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from char1 order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +PREHOOK: query: -- insert overwrite, from string +insert overwrite table char1 + select key, value from src order by key, value limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char1 +POSTHOOK: query: -- insert overwrite, from string +insert overwrite table char1 + select key, value from src order by key, value limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char1 +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from char1 order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from char1 order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +PREHOOK: query: -- insert string from char +insert overwrite table char1_1 + select key, value from char1 order by key, value limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@char1 +PREHOOK: Output: default@char1_1 +POSTHOOK: query: -- insert string from char +insert overwrite table char1_1 + select key, value from char1 order by key, value limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char1 +POSTHOOK: Output: default@char1_1 +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1_1.key SIMPLE [(char1)char1.FieldSchema(name:key, type:char(10), comment:null), ] +POSTHOOK: Lineage: char1_1.value SIMPLE [(char1)char1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: select key, value from char1_1 order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@char1_1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from char1_1 order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char1_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1_1.key SIMPLE [(char1)char1.FieldSchema(name:key, type:char(10), comment:null), ] +POSTHOOK: Lineage: char1_1.value SIMPLE [(char1)char1.FieldSchema(name:value, type:char(20), comment:null), ] +0 val_0 +0 val_0 +PREHOOK: query: -- respect string length +insert overwrite table char1 + select key, cast(value as char(3)) from src order by key, value limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char1 +POSTHOOK: query: -- respect string length +insert overwrite table char1 + select key, cast(value as char(3)) from src order by key, value limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char1 +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1_1.key SIMPLE [(char1)char1.FieldSchema(name:key, type:char(10), comment:null), ] +POSTHOOK: Lineage: char1_1.value SIMPLE [(char1)char1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: select key, value from char1 order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from char1 order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1_1.key SIMPLE [(char1)char1.FieldSchema(name:key, type:char(10), comment:null), ] +POSTHOOK: Lineage: char1_1.value SIMPLE [(char1)char1.FieldSchema(name:value, type:char(20), comment:null), ] +0 val +0 val +PREHOOK: query: drop table char1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char1 +PREHOOK: Output: default@char1 +POSTHOOK: query: drop table char1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char1 +POSTHOOK: Output: default@char1 +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1_1.key SIMPLE [(char1)char1.FieldSchema(name:key, type:char(10), comment:null), ] +POSTHOOK: Lineage: char1_1.value SIMPLE [(char1)char1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: drop table char1_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char1_1 +PREHOOK: Output: default@char1_1 +POSTHOOK: query: drop table char1_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char1_1 +POSTHOOK: Output: default@char1_1 +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char1_1.key SIMPLE [(char1)char1.FieldSchema(name:key, type:char(10), comment:null), ] +POSTHOOK: Lineage: char1_1.value SIMPLE [(char1)char1.FieldSchema(name:value, type:char(20), comment:null), ] diff --git ql/src/test/results/clientpositive/char_2.q.out ql/src/test/results/clientpositive/char_2.q.out new file mode 100644 index 0000000..e6b8eab --- /dev/null +++ ql/src/test/results/clientpositive/char_2.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/char_cast.q.out ql/src/test/results/clientpositive/char_cast.q.out new file mode 100644 index 0000000..025fedb --- /dev/null +++ ql/src/test/results/clientpositive/char_cast.q.out @@ -0,0 +1,247 @@ +PREHOOK: query: -- Cast from char to other data types +select + cast(cast('11' as string) as tinyint), + cast(cast('11' as string) as smallint), + cast(cast('11' as string) as int), + cast(cast('11' as string) as bigint), + cast(cast('11.00' as string) as float), + cast(cast('11.00' as string) as double), + cast(cast('11.00' as string) as decimal) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Cast from char to other data types +select + cast(cast('11' as string) as tinyint), + cast(cast('11' as string) as smallint), + cast(cast('11' as string) as int), + cast(cast('11' as string) as bigint), + cast(cast('11.00' as string) as float), + cast(cast('11.00' as string) as double), + cast(cast('11.00' as string) as decimal) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(cast('11' as char(10)) as tinyint), + cast(cast('11' as char(10)) as smallint), + cast(cast('11' as char(10)) as int), + cast(cast('11' as char(10)) as bigint), + cast(cast('11.00' as char(10)) as float), + cast(cast('11.00' as char(10)) as double), + cast(cast('11.00' as char(10)) as decimal) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('11' as char(10)) as tinyint), + cast(cast('11' as char(10)) as smallint), + cast(cast('11' as char(10)) as int), + cast(cast('11' as char(10)) as bigint), + cast(cast('11.00' as char(10)) as float), + cast(cast('11.00' as char(10)) as double), + cast(cast('11.00' as char(10)) as decimal) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(cast('2011-01-01' as string) as date), + cast(cast('2011-01-01 01:02:03' as string) as timestamp) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('2011-01-01' as string) as date), + cast(cast('2011-01-01 01:02:03' as string) as timestamp) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: select + cast(cast('2011-01-01' as char(10)) as date), + cast(cast('2011-01-01 01:02:03' as char(30)) as timestamp) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('2011-01-01' as char(10)) as date), + cast(cast('2011-01-01 01:02:03' as char(30)) as timestamp) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: -- no tests from string/char to boolean, that conversion doesn't look useful +select + cast(cast('abc123' as string) as string), + cast(cast('abc123' as string) as varchar(10)), + cast(cast('abc123' as string) as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- no tests from string/char to boolean, that conversion doesn't look useful +select + cast(cast('abc123' as string) as string), + cast(cast('abc123' as string) as varchar(10)), + cast(cast('abc123' as string) as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +abc123 abc123 abc123 +PREHOOK: query: select + cast(cast('abc123' as char(10)) as string), + cast(cast('abc123' as char(10)) as varchar(10)), + cast(cast('abc123' as char(10)) as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('abc123' as char(10)) as string), + cast(cast('abc123' as char(10)) as varchar(10)), + cast(cast('abc123' as char(10)) as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +abc123 abc123 abc123 +PREHOOK: query: select + cast(cast('abc123' as varchar(10)) as string), + cast(cast('abc123' as varchar(10)) as varchar(10)), + cast(cast('abc123' as varchar(10)) as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast('abc123' as varchar(10)) as string), + cast(cast('abc123' as varchar(10)) as varchar(10)), + cast(cast('abc123' as varchar(10)) as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +abc123 abc123 abc123 +PREHOOK: query: -- cast from other types to char +select + cast(cast(11 as tinyint) as string), + cast(cast(11 as smallint) as string), + cast(cast(11 as int) as string), + cast(cast(11 as bigint) as string), + cast(cast(11.00 as float) as string), + cast(cast(11.00 as double) as string), + cast(cast(11.00 as decimal) as string) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- cast from other types to char +select + cast(cast(11 as tinyint) as string), + cast(cast(11 as smallint) as string), + cast(cast(11 as int) as string), + cast(cast(11 as bigint) as string), + cast(cast(11.00 as float) as string), + cast(cast(11.00 as double) as string), + cast(cast(11.00 as decimal) as string) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(cast(11 as tinyint) as char(10)), + cast(cast(11 as smallint) as char(10)), + cast(cast(11 as int) as char(10)), + cast(cast(11 as bigint) as char(10)), + cast(cast(11.00 as float) as char(10)), + cast(cast(11.00 as double) as char(10)), + cast(cast(11.00 as decimal) as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(cast(11 as tinyint) as char(10)), + cast(cast(11 as smallint) as char(10)), + cast(cast(11 as int) as char(10)), + cast(cast(11 as bigint) as char(10)), + cast(cast(11.00 as float) as char(10)), + cast(cast(11.00 as double) as char(10)), + cast(cast(11.00 as decimal) as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +11 11 11 11 11.0 11.0 11 +PREHOOK: query: select + cast(date '2011-01-01' as string), + cast(timestamp('2011-01-01 01:02:03') as string) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(date '2011-01-01' as string), + cast(timestamp('2011-01-01 01:02:03') as string) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: select + cast(date '2011-01-01' as char(10)), + cast(timestamp('2011-01-01 01:02:03') as char(30)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(date '2011-01-01' as char(10)), + cast(timestamp('2011-01-01 01:02:03') as char(30)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +2011-01-01 2011-01-01 01:02:03 +PREHOOK: query: select + cast(true as string), + cast(false as string) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(true as string), + cast(false as string) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +TRUE FALSE +PREHOOK: query: select + cast(true as char(10)), + cast(false as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select + cast(true as char(10)), + cast(false as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +TRUE FALSE diff --git ql/src/test/results/clientpositive/char_comparison.q.out ql/src/test/results/clientpositive/char_comparison.q.out new file mode 100644 index 0000000..9493c99 --- /dev/null +++ ql/src/test/results/clientpositive/char_comparison.q.out @@ -0,0 +1,105 @@ +PREHOOK: query: -- Should all be true +select + cast('abc' as char(10)) = cast('abc' as char(10)), + cast('abc' as char(10)) <= cast('abc' as char(10)), + cast('abc' as char(10)) >= cast('abc' as char(10)), + cast('abc' as char(10)) < cast('abd' as char(10)), + cast('abc' as char(10)) > cast('abb' as char(10)), + cast('abc' as char(10)) <> cast('abb' as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true +select + cast('abc' as char(10)) = cast('abc' as char(10)), + cast('abc' as char(10)) <= cast('abc' as char(10)), + cast('abc' as char(10)) >= cast('abc' as char(10)), + cast('abc' as char(10)) < cast('abd' as char(10)), + cast('abc' as char(10)) > cast('abb' as char(10)), + cast('abc' as char(10)) <> cast('abb' as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- Different char lengths should still compare the same +select + cast('abc' as char(10)) = cast('abc' as char(3)), + cast('abc' as char(10)) <= cast('abc' as char(3)), + cast('abc' as char(10)) >= cast('abc' as char(3)), + cast('abc' as char(10)) < cast('abd' as char(3)), + cast('abc' as char(10)) > cast('abb' as char(3)), + cast('abc' as char(10)) <> cast('abb' as char(3)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Different char lengths should still compare the same +select + cast('abc' as char(10)) = cast('abc' as char(3)), + cast('abc' as char(10)) <= cast('abc' as char(3)), + cast('abc' as char(10)) >= cast('abc' as char(3)), + cast('abc' as char(10)) < cast('abd' as char(3)), + cast('abc' as char(10)) > cast('abb' as char(3)), + cast('abc' as char(10)) <> cast('abb' as char(3)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- Should work with string types as well +select + cast('abc' as char(10)) = 'abc', + cast('abc' as char(10)) <= 'abc', + cast('abc' as char(10)) >= 'abc', + cast('abc' as char(10)) < 'abd', + cast('abc' as char(10)) > 'abb', + cast('abc' as char(10)) <> 'abb' +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Should work with string types as well +select + cast('abc' as char(10)) = 'abc', + cast('abc' as char(10)) <= 'abc', + cast('abc' as char(10)) >= 'abc', + cast('abc' as char(10)) < 'abd', + cast('abc' as char(10)) > 'abb', + cast('abc' as char(10)) <> 'abb' +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- leading space is significant for char +select + cast(' abc' as char(10)) <> cast('abc' as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- leading space is significant for char +select + cast(' abc' as char(10)) <> cast('abc' as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: -- trailing space is not significant for char +select + cast('abc ' as char(10)) = cast('abc' as char(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- trailing space is not significant for char +select + cast('abc ' as char(10)) = cast('abc' as char(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true diff --git ql/src/test/results/clientpositive/char_join1.q.out ql/src/test/results/clientpositive/char_join1.q.out new file mode 100644 index 0000000..2208ae7 --- /dev/null +++ ql/src/test/results/clientpositive/char_join1.q.out @@ -0,0 +1,134 @@ +PREHOOK: query: drop table char_join1_ch1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_join1_ch1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_join1_ch2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_join1_ch2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_join1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_join1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_join1_ch1 ( + c1 int, + c2 char(10) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_join1_ch1 ( + c1 int, + c2 char(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_join1_ch1 +PREHOOK: query: create table char_join1_ch2 ( + c1 int, + c2 char(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_join1_ch2 ( + c1 int, + c2 char(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_join1_ch2 +PREHOOK: query: create table char_join1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_join1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_join1_str +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_join1_ch1 +PREHOOK: type: LOAD +PREHOOK: Output: default@char_join1_ch1 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_join1_ch1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_join1_ch1 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_join1_ch2 +PREHOOK: type: LOAD +PREHOOK: Output: default@char_join1_ch2 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_join1_ch2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_join1_ch2 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_join1_str +PREHOOK: type: LOAD +PREHOOK: Output: default@char_join1_str +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_join1_str +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_join1_str +PREHOOK: query: -- Join char with same length char +select * from char_join1_ch1 a join char_join1_ch1 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_join1_ch1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join char with same length char +select * from char_join1_ch1 a join char_join1_ch1 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_join1_ch1 +#### A masked pattern was here #### +1 abc 1 abc +1 abc 2 abc +2 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join char with different length char +select * from char_join1_ch1 a join char_join1_ch2 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_join1_ch1 +PREHOOK: Input: default@char_join1_ch2 +#### A masked pattern was here #### +POSTHOOK: query: -- Join char with different length char +select * from char_join1_ch1 a join char_join1_ch2 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_join1_ch1 +POSTHOOK: Input: default@char_join1_ch2 +#### A masked pattern was here #### +1 abc 1 abc +1 abc 2 abc +2 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join char with string +select * from char_join1_ch1 a join char_join1_str b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_join1_ch1 +PREHOOK: Input: default@char_join1_str +#### A masked pattern was here #### +POSTHOOK: query: -- Join char with string +select * from char_join1_ch1 a join char_join1_str b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_join1_ch1 +POSTHOOK: Input: default@char_join1_str +#### A masked pattern was here #### +1 abc 1 abc +2 abc 1 abc +3 abc 3 abc +PREHOOK: query: drop table char_join1_ch1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_ch1 +PREHOOK: Output: default@char_join1_ch1 +POSTHOOK: query: drop table char_join1_ch1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_ch1 +POSTHOOK: Output: default@char_join1_ch1 +PREHOOK: query: drop table char_join1_ch2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_ch2 +PREHOOK: Output: default@char_join1_ch2 +POSTHOOK: query: drop table char_join1_ch2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_ch2 +POSTHOOK: Output: default@char_join1_ch2 +PREHOOK: query: drop table char_join1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_join1_str +PREHOOK: Output: default@char_join1_str +POSTHOOK: query: drop table char_join1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_join1_str +POSTHOOK: Output: default@char_join1_str diff --git ql/src/test/results/clientpositive/char_nested_types.q.out ql/src/test/results/clientpositive/char_nested_types.q.out new file mode 100644 index 0000000..7fff702 --- /dev/null +++ ql/src/test/results/clientpositive/char_nested_types.q.out @@ -0,0 +1,343 @@ +PREHOOK: query: drop table char_nested_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_nested_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_nested_array +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_nested_array +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_nested_map +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_nested_map +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_nested_struct +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_nested_struct +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_nested_cta +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_nested_cta +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_nested_view +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_nested_view +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_nested_1 (key int, value char(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_nested_1 (key int, value char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_nested_1 +PREHOOK: query: insert overwrite table char_nested_1 + select key, value from src order by key limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_nested_1 +POSTHOOK: query: insert overwrite table char_nested_1 + select key, value from src order by key limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_nested_1 +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- arrays +create table char_nested_array (c1 array) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- arrays +create table char_nested_array (c1 array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_nested_array +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table char_nested_array + select array(value, value) from char_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_1 +PREHOOK: Output: default@char_nested_array +POSTHOOK: query: insert overwrite table char_nested_array + select array(value, value) from char_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_1 +POSTHOOK: Output: default@char_nested_array +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: describe char_nested_array +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe char_nested_array +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +c1 array None +PREHOOK: query: select * from char_nested_array +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_array +#### A masked pattern was here #### +POSTHOOK: query: select * from char_nested_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_array +#### A masked pattern was here #### +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +["val_0 ","val_0 "] +PREHOOK: query: -- maps +create table char_nested_map (c1 map) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- maps +create table char_nested_map (c1 map) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_nested_map +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: insert overwrite table char_nested_map + select map(key, value) from char_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_1 +PREHOOK: Output: default@char_nested_map +POSTHOOK: query: insert overwrite table char_nested_map + select map(key, value) from char_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_1 +POSTHOOK: Output: default@char_nested_map +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: describe char_nested_map +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe char_nested_map +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +c1 map None +PREHOOK: query: select * from char_nested_map +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_map +#### A masked pattern was here #### +POSTHOOK: query: select * from char_nested_map +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_map +#### A masked pattern was here #### +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +{0:"val_0 "} +PREHOOK: query: -- structs +create table char_nested_struct (c1 struct) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- structs +create table char_nested_struct (c1 struct) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_nested_struct +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: insert overwrite table char_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from char_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_1 +PREHOOK: Output: default@char_nested_struct +POSTHOOK: query: insert overwrite table char_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from char_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_1 +POSTHOOK: Output: default@char_nested_struct +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: describe char_nested_struct +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe char_nested_struct +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from char_nested_struct +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_struct +#### A masked pattern was here #### +POSTHOOK: query: select * from char_nested_struct +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_struct +#### A masked pattern was here #### +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +{"a":0,"b":"val_0 ","c":"val_0"} +PREHOOK: query: -- nested type with create table as +create table char_nested_cta as + select * from char_nested_struct +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@char_nested_struct +POSTHOOK: query: -- nested type with create table as +create table char_nested_cta as + select * from char_nested_struct +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@char_nested_struct +POSTHOOK: Output: default@char_nested_cta +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: describe char_nested_cta +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe char_nested_cta +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from char_nested_cta +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_cta +#### A masked pattern was here #### +POSTHOOK: query: select * from char_nested_cta +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_cta +#### A masked pattern was here #### +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +{"a":0,"b":"val_0 ","c":"val_0"} +PREHOOK: query: -- nested type with view +create table char_nested_view as + select * from char_nested_struct +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@char_nested_struct +POSTHOOK: query: -- nested type with view +create table char_nested_view as + select * from char_nested_struct +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@char_nested_struct +POSTHOOK: Output: default@char_nested_view +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: describe char_nested_view +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe char_nested_view +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from char_nested_view +PREHOOK: type: QUERY +PREHOOK: Input: default@char_nested_view +#### A masked pattern was here #### +POSTHOOK: query: select * from char_nested_view +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_nested_view +#### A masked pattern was here #### +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +{"a":0,"b":"val_0 ","c":"val_0"} +PREHOOK: query: drop table char_nested_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_nested_1 +PREHOOK: Output: default@char_nested_1 +POSTHOOK: query: drop table char_nested_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_nested_1 +POSTHOOK: Output: default@char_nested_1 +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: drop table char_nested_array +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_nested_array +PREHOOK: Output: default@char_nested_array +POSTHOOK: query: drop table char_nested_array +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_nested_array +POSTHOOK: Output: default@char_nested_array +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: drop table char_nested_map +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_nested_map +PREHOOK: Output: default@char_nested_map +POSTHOOK: query: drop table char_nested_map +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_nested_map +POSTHOOK: Output: default@char_nested_map +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: drop table char_nested_struct +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_nested_struct +PREHOOK: Output: default@char_nested_struct +POSTHOOK: query: drop table char_nested_struct +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_nested_struct +POSTHOOK: Output: default@char_nested_struct +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: drop table char_nested_cta +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_nested_cta +PREHOOK: Output: default@char_nested_cta +POSTHOOK: query: drop table char_nested_cta +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_nested_cta +POSTHOOK: Output: default@char_nested_cta +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +PREHOOK: query: drop table char_nested_view +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_nested_view +PREHOOK: Output: default@char_nested_view +POSTHOOK: query: drop table char_nested_view +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_nested_view +POSTHOOK: Output: default@char_nested_view +POSTHOOK: Lineage: char_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_nested_array.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_map.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] +POSTHOOK: Lineage: char_nested_struct.c1 EXPRESSION [(char_nested_1)char_nested_1.FieldSchema(name:key, type:int, comment:null), (char_nested_1)char_nested_1.FieldSchema(name:value, type:char(20), comment:null), ] diff --git ql/src/test/results/clientpositive/char_udf1.q.out ql/src/test/results/clientpositive/char_udf1.q.out new file mode 100644 index 0000000..4843e8a --- /dev/null +++ ql/src/test/results/clientpositive/char_udf1.q.out @@ -0,0 +1,554 @@ +PREHOOK: query: drop table char_udf_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_udf_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_udf_1 (c1 string, c2 string, c3 char(10), c4 char(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_udf_1 +PREHOOK: query: insert overwrite table char_udf_1 + select key, value, key, value from src where key = '238' limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_udf_1 +POSTHOOK: query: insert overwrite table char_udf_1 + select key, value, key, value from src where key = '238' limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_udf_1 +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- UDFs with char support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- UDFs with char support +select + concat(c1, c2), + concat(c3, c4), + concat(c1, c2) = concat(c3, c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238val_238 238val_238 true +PREHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + upper(c2), + upper(c4), + upper(c2) = upper(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +VAL_238 VAL_238 true +PREHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lower(c2), + lower(c4), + lower(c2) = lower(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Scalar UDFs +select + ascii(c2), + ascii(c4), + ascii(c2) = ascii(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +118 118 true +PREHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + concat_ws('|', c1, c2), + concat_ws('|', c3, c4), + concat_ws('|', c1, c2) = concat_ws('|', c3, c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238|val_238 238|val_238 true +PREHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + decode(encode(c2, 'US-ASCII'), 'US-ASCII'), + decode(encode(c4, 'US-ASCII'), 'US-ASCII'), + decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + instr(c2, '_'), + instr(c4, '_'), + instr(c2, '_') = instr(c4, '_') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +4 4 true +PREHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + length(c2), + length(c4), + length(c2) = length(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +7 7 true +PREHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + locate('a', 'abcdabcd', 3), + locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3), + locate('a', 'abcdabcd', 3) = locate(cast('a' as char(1)), cast('abcdabcd' as char(10)), 3) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +5 5 true +PREHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + lpad(c2, 15, ' '), + lpad(c4, 15, ' '), + lpad(c2, 15, ' ') = lpad(c4, 15, ' ') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] + val_238 val_238 true +PREHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + ltrim(c2), + ltrim(c4), + ltrim(c2) = ltrim(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + regexp(c2, 'val'), + regexp(c4, 'val'), + regexp(c2, 'val') = regexp(c4, 'val') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp(c2, 'val'), + regexp(c4, 'val'), + regexp(c2, 'val') = regexp(c4, 'val') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +true true true +PREHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_extract(c2, 'val_([0-9]+)', 1), + regexp_extract(c4, 'val_([0-9]+)', 1), + regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 238 true +PREHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + regexp_replace(c2, 'val', 'replaced'), + regexp_replace(c4, 'val', 'replaced'), + regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +replaced_238 replaced_238 true +PREHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + reverse(c2), + reverse(c4), + reverse(c2) = reverse(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +832_lav 832_lav true +PREHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rpad(c2, 15, ' '), + rpad(c4, 15, ' '), + rpad(c2, 15, ' ') = rpad(c4, 15, ' ') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + rtrim(c2), + rtrim(c4), + rtrim(c2) = rtrim(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as char(50))) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + sentences('See spot run. See jane run.'), + sentences(cast('See spot run. See jane run.' as char(50))) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +[["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] +PREHOOK: query: select + split(c2, '_'), + split(c4, '_') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + split(c2, '_'), + split(c4, '_') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +["val","238"] ["val","238"] +PREHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + str_to_map('a:1,b:2,c:3',',',':'), + str_to_map(cast('a:1,b:2,c:3' as char(20)),',',':') +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +{"b":"2","a":"1","c":"3"} {"b":"2","a":"1","c":"3"} +PREHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + substr(c2, 1, 3), + substr(c4, 1, 3), + substr(c2, 1, 3) = substr(c4, 1, 3) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val val true +PREHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from char_udf_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + trim(c2), + trim(c4), + trim(c2) = trim(c4) +from char_udf_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 true +PREHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from char_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: -- Aggregate Functions +select + compute_stats(c2, 16), + compute_stats(c4, 16) +from char_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +{"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1} +PREHOOK: query: select + min(c2), + min(c4) +from char_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + min(c2), + min(c4) +from char_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 +PREHOOK: query: select + max(c2), + max(c4) +from char_udf_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: query: select + max(c2), + max(c4) +from char_udf_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_udf_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_238 val_238 +PREHOOK: query: drop table char_udf_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_udf_1 +PREHOOK: Output: default@char_udf_1 +POSTHOOK: query: drop table char_udf_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_udf_1 +POSTHOOK: Output: default@char_udf_1 +POSTHOOK: Lineage: char_udf_1.c1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c3 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_udf_1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/char_union1.q.out ql/src/test/results/clientpositive/char_union1.q.out new file mode 100644 index 0000000..f94c6cc --- /dev/null +++ ql/src/test/results/clientpositive/char_union1.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: drop table char_union1_ch1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_union1_ch1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_union1_ch2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_union1_ch2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table char_union1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_union1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_union1_ch1 ( + c1 int, + c2 char(10) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_union1_ch1 ( + c1 int, + c2 char(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_union1_ch1 +PREHOOK: query: create table char_union1_ch2 ( + c1 int, + c2 char(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_union1_ch2 ( + c1 int, + c2 char(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_union1_ch2 +PREHOOK: query: create table char_union1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table char_union1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_union1_str +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_union1_ch1 +PREHOOK: type: LOAD +PREHOOK: Output: default@char_union1_ch1 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_union1_ch1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_union1_ch1 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_union1_ch2 +PREHOOK: type: LOAD +PREHOOK: Output: default@char_union1_ch2 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_union1_ch2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_union1_ch2 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_union1_str +PREHOOK: type: LOAD +PREHOOK: Output: default@char_union1_str +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table char_union1_str +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_union1_str +PREHOOK: query: -- union char with same length char +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_ch1 limit 1 +) q1 sort by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_union1_ch1 +#### A masked pattern was here #### +POSTHOOK: query: -- union char with same length char +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_ch1 limit 1 +) q1 sort by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_union1_ch1 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: -- union char with different length char +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_ch2 limit 1 +) q1 sort by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_union1_ch1 +PREHOOK: Input: default@char_union1_ch2 +#### A masked pattern was here #### +POSTHOOK: query: -- union char with different length char +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_ch2 limit 1 +) q1 sort by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_union1_ch1 +POSTHOOK: Input: default@char_union1_ch2 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: -- union char with string +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_str limit 1 +) q1 sort by c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_union1_ch1 +PREHOOK: Input: default@char_union1_str +#### A masked pattern was here #### +POSTHOOK: query: -- union char with string +select * from ( + select * from char_union1_ch1 + union all + select * from char_union1_str limit 1 +) q1 sort by c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_union1_ch1 +POSTHOOK: Input: default@char_union1_str +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: drop table char_union1_ch1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_union1_ch1 +PREHOOK: Output: default@char_union1_ch1 +POSTHOOK: query: drop table char_union1_ch1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_union1_ch1 +POSTHOOK: Output: default@char_union1_ch1 +PREHOOK: query: drop table char_union1_ch2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_union1_ch2 +PREHOOK: Output: default@char_union1_ch2 +POSTHOOK: query: drop table char_union1_ch2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_union1_ch2 +POSTHOOK: Output: default@char_union1_ch2 +PREHOOK: query: drop table char_union1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_union1_str +PREHOOK: Output: default@char_union1_str +POSTHOOK: query: drop table char_union1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_union1_str +POSTHOOK: Output: default@char_union1_str diff --git ql/src/test/results/clientpositive/ctas_char.q.out ql/src/test/results/clientpositive/ctas_char.q.out new file mode 100644 index 0000000..3a172b8 --- /dev/null +++ ql/src/test/results/clientpositive/ctas_char.q.out @@ -0,0 +1,126 @@ +PREHOOK: query: drop table ctas_char_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ctas_char_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table ctas_char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table ctas_char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop view ctas_char_3 +PREHOOK: type: DROPVIEW +POSTHOOK: query: drop view ctas_char_3 +POSTHOOK: type: DROPVIEW +PREHOOK: query: create table ctas_char_1 (key char(10), value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table ctas_char_1 (key char(10), value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@ctas_char_1 +PREHOOK: query: insert overwrite table ctas_char_1 + select key, value from src sort by key, value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@ctas_char_1 +POSTHOOK: query: insert overwrite table ctas_char_1 + select key, value from src sort by key, value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@ctas_char_1 +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- create table as with char column +create table ctas_char_2 as select key, value from ctas_char_1 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@ctas_char_1 +POSTHOOK: query: -- create table as with char column +create table ctas_char_2 as select key, value from ctas_char_1 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@ctas_char_1 +POSTHOOK: Output: default@ctas_char_2 +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- view with char column +create view ctas_char_3 as select key, value from ctas_char_2 +PREHOOK: type: CREATEVIEW +POSTHOOK: query: -- view with char column +create view ctas_char_3 as select key, value from ctas_char_2 +POSTHOOK: type: CREATEVIEW +POSTHOOK: Output: default@ctas_char_3 +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from ctas_char_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas_char_1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from ctas_char_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas_char_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select * from ctas_char_2 +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas_char_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ctas_char_2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas_char_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: select * from ctas_char_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@ctas_char_2 +PREHOOK: Input: default@ctas_char_3 +#### A masked pattern was here #### +POSTHOOK: query: select * from ctas_char_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ctas_char_2 +POSTHOOK: Input: default@ctas_char_3 +#### A masked pattern was here #### +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +PREHOOK: query: drop table ctas_char_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ctas_char_1 +PREHOOK: Output: default@ctas_char_1 +POSTHOOK: query: drop table ctas_char_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ctas_char_1 +POSTHOOK: Output: default@ctas_char_1 +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop table ctas_char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@ctas_char_2 +PREHOOK: Output: default@ctas_char_2 +POSTHOOK: query: drop table ctas_char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@ctas_char_2 +POSTHOOK: Output: default@ctas_char_2 +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop view ctas_char_3 +PREHOOK: type: DROPVIEW +PREHOOK: Input: default@ctas_char_3 +PREHOOK: Output: default@ctas_char_3 +POSTHOOK: query: drop view ctas_char_3 +POSTHOOK: type: DROPVIEW +POSTHOOK: Input: default@ctas_char_3 +POSTHOOK: Output: default@ctas_char_3 +POSTHOOK: Lineage: ctas_char_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: ctas_char_1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 2528f6b..ea007ee 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; @@ -255,6 +256,13 @@ static void buildJSONString(StringBuilder sb, Object o, ObjectInspector oi, Stri sb.append('"'); break; } + case CHAR: { + sb.append('"'); + sb.append(escapeString(((HiveCharObjectInspector) poi) + .getPrimitiveJavaObject(o).toString())); + sb.append('"'); + break; + } case VARCHAR: { sb.append('"'); sb.append(escapeString(((HiveVarcharObjectInspector) poi) diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index df85961..4e28d98 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -57,6 +58,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; @@ -64,6 +66,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -282,6 +285,15 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, return deserializeText(buffer, invert, r); } + case CHAR: { + HiveCharWritable r = + reuse == null ? new HiveCharWritable() : (HiveCharWritable) reuse; + // Use internal text member to read value + deserializeText(buffer, invert, r.getTextValue()); + r.enforceMaxLength(getCharacterMaxLength(type)); + return r; + } + case VARCHAR: { HiveVarcharWritable r = reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse; @@ -289,7 +301,7 @@ static Object deserialize(InputByteBuffer buffer, TypeInfo type, deserializeText(buffer, invert, r.getTextValue()); // If we cache helper data for deserialization we could avoid having // to call getVarcharMaxLength() on every deserialize call. - r.enforceMaxLength(getVarcharMaxLength(type)); + r.enforceMaxLength(getCharacterMaxLength(type)); return r; } @@ -525,8 +537,8 @@ private static int deserializeInt(InputByteBuffer buffer, boolean invert) throws return v; } - static int getVarcharMaxLength(TypeInfo type) { - return ((VarcharTypeInfo)type).getLength(); + static int getCharacterMaxLength(TypeInfo type) { + return ((BaseCharTypeInfo)type).getLength(); } static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) @@ -689,8 +701,15 @@ static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, Text t = soi.getPrimitiveWritableObject(o); serializeBytes(buffer, t.getBytes(), t.getLength(), invert); return; - } + } + case CHAR: { + HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi; + HiveCharWritable hc = hcoi.getPrimitiveWritableObject(o); + Text t = hc.getTextValue(); + serializeBytes(buffer, t.getBytes(), t.getLength(), invert); + return; + } case VARCHAR: { HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector)poi; HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java new file mode 100644 index 0000000..934087f --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java @@ -0,0 +1,48 @@ +package org.apache.hadoop.hive.serde2.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.HiveStringUtils; + +public abstract class HiveBaseCharWritable { + protected Text value = new Text(); + transient protected int characterLength = -1; + + public HiveBaseCharWritable() { + } + + public int getCharacterLength() { + return HiveStringUtils.getTextUtfLength(value); + } + + /** + * Access to the internal Text member. Use with care. + * @return + */ + public Text getTextValue() { + return value; + } + + public void readFields(DataInput in) throws IOException { + characterLength = -1; + value.readFields(in); + } + + public void write(DataOutput out) throws IOException { + value.write(out); + } + + public boolean equals(Object obj) { + if (obj == null || (obj.getClass() != this.getClass())) { + return false; + } + return value.equals(((HiveBaseCharWritable)obj).value); + } + + public int hashCode() { + return value.hashCode(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java new file mode 100644 index 0000000..e38b81c --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java @@ -0,0 +1,113 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.io; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; + +public class HiveCharWritable extends HiveBaseCharWritable + implements WritableComparable { + + transient protected int maxLength = -1; + + public HiveCharWritable() { + } + + public HiveCharWritable(HiveChar hc) { + set(hc); + } + + public HiveCharWritable(HiveCharWritable hcw) { + set(hcw); + } + + public void set(HiveChar val) { + set(val.getValue(), val.getMaxLength()); + } + + public void set(String val) { + set(val, -1); + } + + public void set(HiveCharWritable val) { + maxLength = val.maxLength; + characterLength = val.characterLength; + value.set(val.value); + } + + public void set(HiveCharWritable val, int maxLength) { + if (val.maxLength >= maxLength) { + // Stripped value would be the same as val. + value.set(val.value); + this.characterLength = val.characterLength; + this.maxLength = maxLength; + } else { + set(val.getHiveChar(), maxLength); + } + } + + public void set(HiveChar val, int len) { + set(val.getValue(), len); + } + + public void set(String val, int maxLength) { + this.maxLength = maxLength; + characterLength = -1; + value.set(HiveBaseChar.enforceMaxLength(StringUtils.stripEnd(val, " "), maxLength)); + } + + public HiveChar getHiveChar() { + return new HiveChar(value.toString(), maxLength); + } + + public void enforceMaxLength(int maxLength) { + set(getHiveChar(), maxLength); + } + + public Text getStrippedValue() { + // internal representation should already be stripped of trailing spaces + return getTextValue(); + } + + public Text getPaddedValue() { + return new Text(getHiveChar().getPaddedValue()); + } + + public void readFields(DataInput in) throws IOException { + super.readFields(in); + maxLength = -1; + // Need to make sure the string value has trailing spaces stripped + value.set(getHiveChar().getStrippedValue()); + } + + public int compareTo(HiveCharWritable rhs) { + return ShimLoader.getHadoopShims().compareText(value, rhs.value); + } + + @Override + public String toString() { + return getHiveChar().toString(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java index c609cd8..563a304 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java @@ -27,9 +27,8 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; -public class HiveVarcharWritable implements WritableComparable{ - protected Text value = new Text(); - transient protected int characterLength = -1; +public class HiveVarcharWritable extends HiveBaseCharWritable + implements WritableComparable{ public HiveVarcharWritable() { } @@ -76,52 +75,17 @@ public HiveVarchar getHiveVarchar() { return new HiveVarchar(value.toString(), -1); } - public int getCharacterLength() { - if (characterLength < 0) { - characterLength = getHiveVarchar().getCharacterLength(); - } - return characterLength; - } - public void enforceMaxLength(int maxLength) { // Might be possible to truncate the existing Text value, for now just do something simple. set(getHiveVarchar(), maxLength); } - public void readFields(DataInput in) throws IOException { - characterLength = -1; - value.readFields(in); - } - - public void write(DataOutput out) throws IOException { - value.write(out); - } - public int compareTo(HiveVarcharWritable rhs) { return ShimLoader.getHadoopShims().compareText(value, rhs.value); } - public boolean equals(Object obj) { - if (obj == null || !(obj instanceof HiveVarcharWritable)) { - return false; - } - return value.equals(((HiveVarcharWritable)obj).value); - } - @Override public String toString() { return value.toString(); } - - public int hashCode() { - return value.hashCode(); - } - - /** - * Access to the internal Text member. Use with care. - * @return - */ - public Text getTextValue() { - return value; - } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index 12baa34..66f79ed 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; @@ -113,6 +114,8 @@ return new LazyDouble((LazyDoubleObjectInspector) oi); case STRING: return new LazyString((LazyStringObjectInspector) oi); + case CHAR: + return new LazyHiveChar((LazyHiveCharObjectInspector) oi); case VARCHAR: return new LazyHiveVarchar((LazyHiveVarcharObjectInspector) oi); case DATE: diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java new file mode 100644 index 0000000..ef469eb --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import java.nio.charset.CharacterCodingException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.io.Text; + +/** + * LazyObject for storing a value of HiveChar. + * + */ +public class LazyHiveChar extends + LazyPrimitive { + + private static final Log LOG = LogFactory.getLog(LazyHiveChar.class); + + protected int maxLength = -1; + + public LazyHiveChar(LazyHiveCharObjectInspector oi) { + super(oi); + maxLength = ((CharTypeInfo)oi.getTypeInfo()).getLength(); + data = new HiveCharWritable(); + } + + public LazyHiveChar(LazyHiveChar copy) { + super(copy); + this.maxLength = copy.maxLength; + data = new HiveCharWritable(copy.data); + } + + public void setValue(LazyHiveChar copy) { + data.set(copy.data, maxLength); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + String byteData = null; + try { + byteData = Text.decode(bytes.getData(), start, length); + data.set(byteData, maxLength); + isNull = false; + } catch (CharacterCodingException e) { + isNull = true; + LOG.debug("Data not in the HiveChar data type range so converted to null.", e); + } + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index f2e7d70..50d3d1e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; @@ -226,7 +228,14 @@ public static void writePrimitiveUTF8(OutputStream out, Object o, needsEscape); break; } - + case CHAR: { + HiveCharWritable hc = ((HiveCharObjectInspector) oi).getPrimitiveWritableObject(o); + // char is holding stripped value, and we need to write padded value here. + Text t = new Text(hc.toString()); + writeEscaped(out, t.getBytes(), 0, t.getLength(), escaped, escapeChar, + needsEscape); + break; + } case VARCHAR: { HiveVarcharWritable hc = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(o); Text t = hc.getTextValue(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java new file mode 100644 index 0000000..65fb1ab --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveCharObjectInspector.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; + + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveChar; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; + +public class LazyHiveCharObjectInspector + extends AbstractPrimitiveLazyObjectInspector + implements HiveCharObjectInspector { + + // no-arg ctor required for Kyro + public LazyHiveCharObjectInspector() { + } + + public LazyHiveCharObjectInspector(CharTypeInfo typeInfo) { + super(typeInfo); + } + + @Override + public Object copyObject(Object o) { + if (o == null) { + return null; + } + + LazyHiveChar ret = new LazyHiveChar(this); + ret.setValue((LazyHiveChar) o); + return ret; + } + + @Override + public HiveChar getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + + HiveChar ret = ((LazyHiveChar) o).getWritableObject().getHiveChar(); + if (!BaseCharUtils.doesPrimitiveMatchTypeParams( + ret, (CharTypeInfo)typeInfo)) { + HiveChar newValue = new HiveChar(ret, ((CharTypeInfo)typeInfo).getLength()); + return newValue; + } + return ret; + } + + @Override + public String toString() { + return getTypeName(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java index 2267e83..c802ed0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; public class LazyHiveVarcharObjectInspector extends AbstractPrimitiveLazyObjectInspector @@ -55,7 +55,7 @@ public HiveVarchar getPrimitiveJavaObject(Object o) { } HiveVarchar ret = ((LazyHiveVarchar) o).getWritableObject().getHiveVarchar(); - if (!VarcharUtils.doesPrimitiveMatchTypeParams( + if (!BaseCharUtils.doesPrimitiveMatchTypeParams( ret, (VarcharTypeInfo)typeInfo)) { HiveVarchar newValue = new HiveVarchar(ret, ((VarcharTypeInfo)typeInfo).getLength()); return newValue; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index 7cf4e2f..5f64697 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -122,6 +123,9 @@ private LazyPrimitiveObjectInspectorFactory() { // Object inspector hasn't been cached for this type/params yet, create now switch (typeInfo.getPrimitiveCategory()) { + case CHAR: + poi = new LazyHiveCharObjectInspector((CharTypeInfo) typeInfo); + break; case VARCHAR: poi = new LazyHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo); break; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java index c3d31f1..cae4faa 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; @@ -72,6 +73,8 @@ return new LazyBinaryDouble((WritableDoubleObjectInspector) oi); case STRING: return new LazyBinaryString((WritableStringObjectInspector) oi); + case CHAR: + return new LazyBinaryHiveChar((WritableHiveCharObjectInspector) oi); case VARCHAR: return new LazyBinaryHiveVarchar((WritableHiveVarcharObjectInspector) oi); case VOID: // for NULL diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java new file mode 100644 index 0000000..b64a3b4 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.io.Text; + +public class LazyBinaryHiveChar extends + LazyBinaryPrimitive { + + protected int maxLength = -1; + + LazyBinaryHiveChar(WritableHiveCharObjectInspector oi) { + super(oi); + maxLength = ((CharTypeInfo)oi.getTypeInfo()).getLength(); + data = new HiveCharWritable(); + } + + LazyBinaryHiveChar(LazyBinaryHiveChar copy) { + super(copy); + maxLength = copy.maxLength; + data = new HiveCharWritable(copy.data); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + // re-use existing text member in char writable + Text textValue = data.getTextValue(); + textValue.set(bytes.getData(), start, length); + data.enforceMaxLength(maxLength); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index 847bd11..18032d3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; @@ -369,6 +370,12 @@ public static boolean serialize(Output byteStream, Object obj, serializeText(byteStream, t, skipLengthPrefix); return warnedOnceNullMapKey; } + case CHAR: { + HiveCharObjectInspector hcoi = (HiveCharObjectInspector) poi; + Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue(); + serializeText(byteStream, t, skipLengthPrefix); + return warnedOnceNullMapKey; + } case VARCHAR: { HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi; Text t = hcoi.getPrimitiveWritableObject(obj).getTextValue(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java index f8a44b7..c583ae2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -195,7 +195,7 @@ public static void checkObjectByteInfo(ObjectInspector objectInspector, recordInfo.elementOffset = vInt.length; recordInfo.elementSize = vInt.value; break; - + case CHAR: case VARCHAR: LazyBinaryUtils.readVInt(bytes, offset, vInt); recordInfo.elementOffset = vInt.length; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index 06440ec..8a42577 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector; @@ -103,6 +104,10 @@ private static Converter getConverter(PrimitiveObjectInspector inputOI, return new PrimitiveObjectInspectorConverter.StringConverter( inputOI); } + case CHAR: + return new PrimitiveObjectInspectorConverter.HiveCharConverter( + inputOI, + (SettableHiveCharObjectInspector) outputOI); case VARCHAR: return new PrimitiveObjectInspectorConverter.HiveVarcharConverter( inputOI, diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index d0fa976..ba8342d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -31,6 +31,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -43,6 +44,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; @@ -55,6 +57,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableIntObjectInspector; @@ -503,6 +506,8 @@ public static int hashCode(Object o, ObjectInspector objIns) { } return r; } + case CHAR: + return ((HiveCharObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); case VARCHAR: return ((HiveVarcharObjectInspector)poi).getPrimitiveWritableObject(o).hashCode(); case BINARY: @@ -700,6 +705,11 @@ public static int compare(Object o1, ObjectInspector oi1, Object o2, .compareTo(s2)); } } + case CHAR: { + HiveCharWritable t1 = ((HiveCharObjectInspector)poi1).getPrimitiveWritableObject(o1); + HiveCharWritable t2 = ((HiveCharObjectInspector)poi2).getPrimitiveWritableObject(o2); + return t1.compareTo(t2); + } case VARCHAR: { HiveVarcharWritable t1 = ((HiveVarcharObjectInspector)poi1).getPrimitiveWritableObject(o1); HiveVarcharWritable t2 = ((HiveVarcharObjectInspector)poi2).getPrimitiveWritableObject(o2); @@ -1048,6 +1058,8 @@ private static boolean isInstanceOfSettablePrimitiveOI(PrimitiveObjectInspector case STRING: return oi instanceof WritableStringObjectInspector || oi instanceof JavaStringObjectInspector; + case CHAR: + return oi instanceof SettableHiveCharObjectInspector; case VARCHAR: return oi instanceof SettableHiveVarcharObjectInspector; case DATE: diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java index 22e5ec5..da5416a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java @@ -31,7 +31,7 @@ */ public static enum PrimitiveCategory { VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, - DATE, TIMESTAMP, BINARY, DECIMAL, VARCHAR, UNKNOWN + DATE, TIMESTAMP, BINARY, DECIMAL, VARCHAR, CHAR, UNKNOWN }; public PrimitiveTypeInfo getTypeInfo(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveCharObjectInspector.java new file mode 100644 index 0000000..157dac3 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveCharObjectInspector.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +public interface HiveCharObjectInspector extends PrimitiveObjectInspector { + HiveCharWritable getPrimitiveWritableObject(Object o); + + HiveChar getPrimitiveJavaObject(Object o); +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveCharObjectInspector.java new file mode 100644 index 0000000..ff114c0 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveCharObjectInspector.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; + +public class JavaHiveCharObjectInspector extends AbstractPrimitiveJavaObjectInspector + implements SettableHiveCharObjectInspector { + + // no-arg ctor required for Kyro serialization + public JavaHiveCharObjectInspector() { + } + + public JavaHiveCharObjectInspector(CharTypeInfo typeInfo) { + super(typeInfo); + } + + public HiveChar getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + HiveChar value = (HiveChar) o; + if (BaseCharUtils.doesPrimitiveMatchTypeParams(value, (CharTypeInfo) typeInfo)) { + return value; + } + // value needs to be converted to match type params + return getPrimitiveWithParams(value); + } + + public HiveCharWritable getPrimitiveWritableObject(Object o) { + if (o == null) { + return null; + } + return getWritableWithParams((HiveChar) o); + } + + private HiveChar getPrimitiveWithParams(HiveChar val) { + HiveChar hc = new HiveChar(val, getMaxLength()); + return hc; + } + + private HiveCharWritable getWritableWithParams(HiveChar val) { + HiveCharWritable hcw = new HiveCharWritable(); + hcw.set(val, getMaxLength()); + return hcw; + } + + public Object set(Object o, HiveChar value) { + HiveChar setValue = (HiveChar) o; + setValue.setValue(value, getMaxLength()); + return setValue; + } + + public Object set(Object o, String value) { + HiveChar setValue = (HiveChar) o; + setValue.setValue(value, getMaxLength()); + return setValue; + } + + public Object create(HiveChar value) { + HiveChar hc = new HiveChar(value, getMaxLength()); + return hc; + } + + public int getMaxLength() { + CharTypeInfo ti = (CharTypeInfo) typeInfo; + return ti.getLength(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java index 590d04b..24684ef 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java @@ -20,7 +20,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; public class JavaHiveVarcharObjectInspector extends AbstractPrimitiveJavaObjectInspector implements SettableHiveVarcharObjectInspector { @@ -39,7 +39,7 @@ public HiveVarchar getPrimitiveJavaObject(Object o) { return null; } HiveVarchar value = (HiveVarchar)o; - if (VarcharUtils.doesPrimitiveMatchTypeParams( + if (BaseCharUtils.doesPrimitiveMatchTypeParams( value, (VarcharTypeInfo)typeInfo)) { return value; } @@ -69,7 +69,7 @@ private HiveVarcharWritable getWritableWithParams(HiveVarchar val) { @Override public Object set(Object o, HiveVarchar value) { HiveVarchar setValue = (HiveVarchar)o; - if (VarcharUtils.doesPrimitiveMatchTypeParams( + if (BaseCharUtils.doesPrimitiveMatchTypeParams( value, (VarcharTypeInfo)typeInfo)) { setValue.setValue(value); } else { diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index 50de06a..06d5c5e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -21,9 +21,11 @@ import java.sql.Date; import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; @@ -391,6 +393,16 @@ public Text convert(Object input) { t.set(((StringObjectInspector) inputOI).getPrimitiveJavaObject(input)); } return t; + case CHAR: + // when converting from char, the value should be stripped of any trailing spaces. + if (inputOI.preferWritable()) { + // char text value is already stripped of trailing space + t.set(((HiveCharObjectInspector) inputOI).getPrimitiveWritableObject(input) + .getStrippedValue()); + } else { + t.set(((HiveCharObjectInspector) inputOI).getPrimitiveJavaObject(input).getStrippedValue()); + } + return t; case VARCHAR: if (inputOI.preferWritable()) { t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveWritableObject(input) @@ -470,4 +482,29 @@ public Object convert(Object input) { } } + + public static class HiveCharConverter implements Converter { + PrimitiveObjectInspector inputOI; + SettableHiveCharObjectInspector outputOI; + HiveCharWritable hc; + + public HiveCharConverter(PrimitiveObjectInspector inputOI, + SettableHiveCharObjectInspector outputOI) { + this.inputOI = inputOI; + this.outputOI = outputOI; + hc = new HiveCharWritable(); + } + + @Override + public Object convert(Object input) { + switch (inputOI.getPrimitiveCategory()) { + case BOOLEAN: + return outputOI.set(hc, + ((BooleanObjectInspector) inputOI).get(input) ? + new HiveChar("TRUE", -1) : new HiveChar("FALSE", -1)); + default: + return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveChar(input, inputOI)); + } + } + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java index efb7303..e6b2ec4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -34,6 +35,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -247,6 +249,9 @@ public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjec } switch (typeInfo.getPrimitiveCategory()) { + case CHAR: + result = new WritableHiveCharObjectInspector((CharTypeInfo) typeInfo); + break; case VARCHAR: result = new WritableHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo); break; @@ -287,6 +292,9 @@ public static ConstantObjectInspector getPrimitiveWritableConstantObjectInspecto return new WritableConstantDoubleObjectInspector((DoubleWritable)value); case STRING: return new WritableConstantStringObjectInspector((Text)value); + case CHAR: + return new WritableConstantHiveCharObjectInspector((CharTypeInfo) typeInfo, + (HiveCharWritable) value); case VARCHAR: return new WritableConstantHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo, (HiveVarcharWritable)value); @@ -336,6 +344,9 @@ public static AbstractPrimitiveJavaObjectInspector getPrimitiveJavaObjectInspect } switch (typeInfo.getPrimitiveCategory()) { + case CHAR: + result = new JavaHiveCharObjectInspector((CharTypeInfo) typeInfo); + break; case VARCHAR: result = new JavaHiveVarcharObjectInspector((VarcharTypeInfo)typeInfo); break; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index a931578..5ccacf1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -29,12 +29,14 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -225,6 +227,9 @@ static void registerType(PrimitiveTypeEntry t) { public static final PrimitiveTypeEntry varcharTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.VARCHAR, serdeConstants.VARCHAR_TYPE_NAME, null, HiveVarchar.class, HiveVarcharWritable.class); + public static final PrimitiveTypeEntry charTypeEntry = new PrimitiveTypeEntry( + PrimitiveCategory.CHAR, serdeConstants.CHAR_TYPE_NAME, null, HiveChar.class, + HiveCharWritable.class); // The following is a complex type for special handling public static final PrimitiveTypeEntry unknownTypeEntry = new PrimitiveTypeEntry( @@ -233,6 +238,7 @@ static void registerType(PrimitiveTypeEntry t) { static { registerType(binaryTypeEntry); registerType(stringTypeEntry); + registerType(charTypeEntry); registerType(varcharTypeEntry); registerType(booleanTypeEntry); registerType(intTypeEntry); @@ -404,6 +410,10 @@ public static boolean comparePrimitiveObjects(Object o1, .getPrimitiveWritableObject(o2); return t1.equals(t2); } + case CHAR: { + return ((HiveCharObjectInspector)oi1).getPrimitiveWritableObject(o1) + .equals(((HiveCharObjectInspector)oi2).getPrimitiveWritableObject(o2)); + } case VARCHAR: { return ((HiveVarcharObjectInspector)oi1).getPrimitiveWritableObject(o1) .equals(((HiveVarcharObjectInspector)oi2).getPrimitiveWritableObject(o2)); @@ -611,6 +621,7 @@ public static int getInt(Object o, PrimitiveObjectInspector oi) { } break; } + case CHAR: case VARCHAR: { result = Integer.parseInt(getString(o, oi)); break; @@ -674,6 +685,7 @@ public static long getLong(Object o, PrimitiveObjectInspector oi) { result = Long.parseLong(s); } break; + case CHAR: case VARCHAR: { result = Long.parseLong(getString(o, oi)); break; @@ -731,6 +743,7 @@ public static double getDouble(Object o, PrimitiveObjectInspector oi) { String s = soi.getPrimitiveJavaObject(o); result = Double.parseDouble(s); break; + case CHAR: case VARCHAR: result = Double.parseDouble(getString(o, oi)); break; @@ -809,6 +822,10 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { StringObjectInspector soi = (StringObjectInspector) oi; result = soi.getPrimitiveJavaObject(o); break; + case CHAR: + // when converting from char to string/varchar, strip any trailing spaces + result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o).getStrippedValue(); + break; case VARCHAR: HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) oi; result = hcoi.getPrimitiveJavaObject(o).toString(); @@ -830,6 +847,25 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { return result; } + public static HiveChar getHiveChar(Object o, PrimitiveObjectInspector oi) { + if (o == null) { + return null; + } + + HiveChar result = null; + switch (oi.getPrimitiveCategory()) { + case CHAR: + result = ((HiveCharObjectInspector) oi).getPrimitiveJavaObject(o); + break; + default: + // No char length available, copy whole string value here. + result = new HiveChar(); + result.setValue(getString(o, oi)); + break; + } + return result; + } + public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) { if (o == null) { @@ -852,6 +888,12 @@ public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) return result; } + public static BytesWritable getBinaryFromText(Text text) { + BytesWritable bw = new BytesWritable(); + bw.set(text.getBytes(), 0, text.getLength()); + return bw; + } + public static BytesWritable getBinary(Object o, PrimitiveObjectInspector oi) { if (null == o) { @@ -865,9 +907,14 @@ public static BytesWritable getBinary(Object o, PrimitiveObjectInspector oi) { case STRING: Text text = ((StringObjectInspector) oi).getPrimitiveWritableObject(o); - BytesWritable bw = new BytesWritable(); - bw.set(text.getBytes(), 0, text.getLength()); - return bw; + return getBinaryFromText(text); + case CHAR: + // char to binary conversion: include trailing spaces? + return getBinaryFromText( + ((HiveCharObjectInspector) oi).getPrimitiveWritableObject(o).getPaddedValue()); + case VARCHAR: + return getBinaryFromText( + ((HiveVarcharObjectInspector) oi).getPrimitiveWritableObject(o).getTextValue()); case BINARY: return ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); @@ -915,6 +962,7 @@ public static HiveDecimal getHiveDecimal(Object o, PrimitiveObjectInspector oi) case STRING: result = HiveDecimal.create(((StringObjectInspector) oi).getPrimitiveJavaObject(o)); break; + case CHAR: case VARCHAR: result = HiveDecimal.create(getString(o, oi)); break; @@ -953,6 +1001,7 @@ public static Date getDate(Object o, PrimitiveObjectInspector oi) { result = null; } break; + case CHAR: case VARCHAR: { try { String val = getString(o, oi).trim(); @@ -1017,6 +1066,7 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { String s = soi.getPrimitiveJavaObject(o); result = getTimestampFromString(s); break; + case CHAR: case VARCHAR: result = getTimestampFromString(getString(o, oi)); break; @@ -1089,6 +1139,7 @@ public static PrimitiveGrouping getPrimitiveGrouping(PrimitiveCategory primitive case DECIMAL: return PrimitiveGrouping.NUMERIC_GROUP; case STRING: + case CHAR: case VARCHAR: return PrimitiveGrouping.STRING_GROUP; case BOOLEAN: diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveCharObjectInspector.java new file mode 100644 index 0000000..a10929f --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveCharObjectInspector.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveChar; + +public interface SettableHiveCharObjectInspector extends HiveCharObjectInspector { + Object set(Object o, HiveChar value); + + Object set(Object o, String value); + + Object create(HiveChar value); + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveCharObjectInspector.java new file mode 100644 index 0000000..b8c3c38 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveCharObjectInspector.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; + +/** + * A WritableConstantHiveCharObjectInspector is a WritableHiveCharObjectInspector + * that implements ConstantObjectInspector. + */ +public class WritableConstantHiveCharObjectInspector extends + WritableHiveCharObjectInspector implements + ConstantObjectInspector { + + protected HiveCharWritable value; + + // no-arg ctor required for Kyro serialization + WritableConstantHiveCharObjectInspector() { + } + + WritableConstantHiveCharObjectInspector(CharTypeInfo typeInfo, + HiveCharWritable value) { + super(typeInfo); + this.value = value; + } + + @Override + public HiveCharWritable getWritableConstantValue() { + return value; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java new file mode 100644 index 0000000..c340dcb --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; + +public class WritableHiveCharObjectInspector extends AbstractPrimitiveWritableObjectInspector + implements SettableHiveCharObjectInspector { + // no-arg ctor required for Kyro serialization + public WritableHiveCharObjectInspector() { + } + + public WritableHiveCharObjectInspector(CharTypeInfo typeInfo) { + super(typeInfo); + } + + @Override + public HiveChar getPrimitiveJavaObject(Object o) { + // check input object's length, if it doesn't match + // then output a new primitive with the correct params. + if (o == null) { + return null; + } + HiveCharWritable writable = ((HiveCharWritable) o); + if (doesWritableMatchTypeParams(writable)) { + return writable.getHiveChar(); + } + return getPrimitiveWithParams(writable); + } + + @Override + public HiveCharWritable getPrimitiveWritableObject(Object o) { + // check input object's length, if it doesn't match + // then output new writable with correct params. + if (o == null) { + return null; + } + HiveCharWritable writable = ((HiveCharWritable) o); + if (doesWritableMatchTypeParams((HiveCharWritable) o)) { + return writable; + } + + return getWritableWithParams(writable); + } + + private HiveChar getPrimitiveWithParams(HiveCharWritable val) { + HiveChar hv = new HiveChar(); + hv.setValue(val.getHiveChar(), getMaxLength()); + return hv; + } + + private HiveCharWritable getWritableWithParams(HiveCharWritable val) { + HiveCharWritable newValue = new HiveCharWritable(); + newValue.set(val, getMaxLength()); + return newValue; + } + + private boolean doesWritableMatchTypeParams(HiveCharWritable writable) { + return BaseCharUtils.doesWritableMatchTypeParams( + writable, (CharTypeInfo)typeInfo); + } + + public Object copyObject(Object o) { + if (o == null) { + return null; + } + HiveCharWritable writable = (HiveCharWritable) o; + if (doesWritableMatchTypeParams((HiveCharWritable) o)) { + return new HiveCharWritable(writable); + } + return getWritableWithParams(writable); + } + + @Override + public Object set(Object o, HiveChar value) { + HiveCharWritable writable = (HiveCharWritable) o; + writable.set(value, getMaxLength()); + return o; + } + + @Override + public Object set(Object o, String value) { + HiveCharWritable writable = (HiveCharWritable) o; + writable.set(value, getMaxLength()); + return o; + } + + @Override + public Object create(HiveChar value) { + HiveCharWritable ret; + ret = new HiveCharWritable(); + ret.set(value, getMaxLength()); + return ret; + } + + public int getMaxLength() { + return ((CharTypeInfo)typeInfo).getLength(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java index 24d09ce..7bbfd1f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java @@ -22,7 +22,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils; public class WritableHiveVarcharObjectInspector extends AbstractPrimitiveWritableObjectInspector implements SettableHiveVarcharObjectInspector { @@ -78,7 +78,7 @@ private HiveVarcharWritable getWritableWithParams(HiveVarcharWritable val) { } private boolean doesWritableMatchTypeParams(HiveVarcharWritable writable) { - return VarcharUtils.doesWritableMatchTypeParams( + return BaseCharUtils.doesWritableMatchTypeParams( writable, (VarcharTypeInfo)typeInfo); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseCharUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseCharUtils.java new file mode 100644 index 0000000..09e07c7 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseCharUtils.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveBaseCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; + +public class BaseCharUtils { + + public static void validateVarcharParameter(int length) { + if (length > HiveVarchar.MAX_VARCHAR_LENGTH || length < 1) { + throw new RuntimeException("Varchar length " + length + " out of allowed range [1, " + + HiveVarchar.MAX_VARCHAR_LENGTH + "]"); + } + } + + public static void validateCharParameter(int length) { + if (length > HiveChar.MAX_CHAR_LENGTH || length < 1) { + throw new RuntimeException("Char length " + length + " out of allowed range [1, " + + HiveChar.MAX_CHAR_LENGTH + "]"); + } + } + + public static boolean doesWritableMatchTypeParams(HiveBaseCharWritable writable, + BaseCharTypeInfo typeInfo) { + return typeInfo.getLength() >= writable.getCharacterLength(); + } + + public static boolean doesPrimitiveMatchTypeParams(HiveBaseChar value, + BaseCharTypeInfo typeInfo) { + return typeInfo.getLength() == value.getCharacterLength(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/CharTypeInfo.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/CharTypeInfo.java new file mode 100644 index 0000000..ad1837b --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/CharTypeInfo.java @@ -0,0 +1,46 @@ +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.hive.serde.serdeConstants; + +public class CharTypeInfo extends BaseCharTypeInfo { + private static final long serialVersionUID = 1L; + + // no-arg constructor to make kyro happy. + public CharTypeInfo() { + } + + public CharTypeInfo(int length) { + super(serdeConstants.CHAR_TYPE_NAME, length); + BaseCharUtils.validateCharParameter(length); + } + + @Override + public String getTypeName() { + return getQualifiedName(); + } + + @Override + public boolean equals(Object other) { + if (other == null || !(other instanceof CharTypeInfo)) { + return false; + } + + CharTypeInfo pti = (CharTypeInfo) other; + + return this.typeName.equals(pti.typeName) && this.getLength() == pti.getLength(); + } + + /** + * Generate the hashCode for this TypeInfo. + */ + @Override + public int hashCode() { + return getQualifiedName().hashCode(); + } + + @Override + public String toString() { + return getQualifiedName(); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java index ca152a8..18027a4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; @@ -132,6 +133,11 @@ private static PrimitiveTypeInfo createPrimitiveTypeInfo(String fullName) { } switch (typeEntry.primitiveCategory) { + case CHAR: + if (parts.typeParams.length != 1) { + return null; + } + return new CharTypeInfo(Integer.valueOf(parts.typeParams[0])); case VARCHAR: if (parts.typeParams.length != 1) { return null; @@ -148,6 +154,11 @@ private static PrimitiveTypeInfo createPrimitiveTypeInfo(String fullName) { } } + public static CharTypeInfo getCharTypeInfo(int length) { + String fullName = BaseCharTypeInfo.getQualifiedName(serdeConstants.CHAR_TYPE_NAME, length); + return (CharTypeInfo) getPrimitiveTypeInfo(fullName); + } + public static VarcharTypeInfo getVarcharTypeInfo(int length) { String fullName = BaseCharTypeInfo.getQualifiedName(serdeConstants.VARCHAR_TYPE_NAME, length); return (VarcharTypeInfo) getPrimitiveTypeInfo(fullName); diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 48aa52c..2982cf6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -29,6 +29,7 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -397,17 +398,24 @@ private TypeInfo parseType() { String qualifiedTypeName = typeEntry.typeName; String[] params = parseParams(); switch (typeEntry.primitiveCategory) { + case CHAR: case VARCHAR: if (params == null || params.length == 0) { - throw new IllegalArgumentException( "Varchar type is specified without length: " + typeInfoString); + throw new IllegalArgumentException(typeEntry.typeName + + " type is specified without length: " + typeInfoString); } if (params.length == 1) { int length = Integer.valueOf(params[0]); - VarcharUtils.validateParameter(length); + if (typeEntry.primitiveCategory == PrimitiveCategory.VARCHAR) { + BaseCharUtils.validateVarcharParameter(length); + } else { + BaseCharUtils.validateCharParameter(length); + } qualifiedTypeName = BaseCharTypeInfo.getQualifiedName(typeEntry.typeName, length); } else if (params.length > 1) { - throw new IllegalArgumentException("Type varchar only takes one parameter, but " + + throw new IllegalArgumentException( + "Type " + typeEntry.typeName+ " only takes one parameter, but " + params.length + " is seen"); } @@ -778,9 +786,10 @@ public static int getCharacterLengthForType(PrimitiveTypeInfo typeInfo) { switch (typeInfo.getPrimitiveCategory()) { case STRING: return HiveVarchar.MAX_VARCHAR_LENGTH; + case CHAR: case VARCHAR: - VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) typeInfo; - return varcharTypeInfo.getLength(); + BaseCharTypeInfo baseCharTypeInfo = (BaseCharTypeInfo) typeInfo; + return baseCharTypeInfo.getLength(); default: return 0; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java index 676a1e5..4550275 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeInfo.java @@ -29,7 +29,7 @@ public VarcharTypeInfo() { public VarcharTypeInfo(int length) { super(serdeConstants.VARCHAR_TYPE_NAME, length); - VarcharUtils.validateParameter(length); + BaseCharUtils.validateVarcharParameter(length); } @Override diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharUtils.java deleted file mode 100644 index 0f721c5..0000000 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharUtils.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.typeinfo; - -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; - -public class VarcharUtils { - - public static void validateParameter(int length) { - if (length > HiveVarchar.MAX_VARCHAR_LENGTH || length < 1) { - throw new RuntimeException("Varchar length " + length + " out of allowed range [1, " + - HiveVarchar.MAX_VARCHAR_LENGTH + "]"); - } - } - - public static boolean doesWritableMatchTypeParams(HiveVarcharWritable writable, - VarcharTypeInfo typeInfo) { - return typeInfo.getLength() >= writable.getCharacterLength(); - } - - public static boolean doesPrimitiveMatchTypeParams(HiveVarchar value, - VarcharTypeInfo typeInfo) { - return typeInfo.getLength() == value.getCharacterLength(); - } - -} diff --git serde/src/test/org/apache/hadoop/hive/serde2/io/TestHiveCharWritable.java serde/src/test/org/apache/hadoop/hive/serde2/io/TestHiveCharWritable.java new file mode 100644 index 0000000..61eeb4c --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/io/TestHiveCharWritable.java @@ -0,0 +1,111 @@ +package org.apache.hadoop.hive.serde2.io; + +import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.HiveChar; + +public class TestHiveCharWritable extends TestCase { + public void testConstructor() throws Exception { + HiveCharWritable hcw1 = new HiveCharWritable(new HiveChar("abc", 5)); + assertEquals("abc ", hcw1.toString()); + + HiveCharWritable hcw2 = new HiveCharWritable(hcw1); + assertEquals("abc ", hcw2.toString()); + } + + public void testSet() throws Exception { + HiveCharWritable hcw1 = new HiveCharWritable(); + + HiveChar hc1 = new HiveChar("abcd", 8); + hcw1.set(hc1); + assertEquals("abcd ", hcw1.toString()); + + hcw1.set(hc1, 10); + assertEquals("abcd ", hcw1.toString()); + + hcw1.set(hc1, 2); + assertEquals("ab", hcw1.toString()); + + // copy whole value for strings, except trailing spaces + hcw1.set("abcd"); + assertEquals("abcd", hcw1.toString()); + + hcw1.set("abcd "); + assertEquals("abcd", hcw1.toString()); + + hcw1.set("abcd", 10); + assertEquals("abcd ", hcw1.toString()); + + hcw1.set("abcd", 2); + assertEquals("ab", hcw1.toString()); + + HiveCharWritable hcw2 = new HiveCharWritable(hc1); + hcw1.set(hcw2); + assertEquals("abcd ", hcw1.toString()); + + hcw1.set(hcw2, 10); + assertEquals("abcd ", hcw1.toString()); + + hcw1.set(hcw2, 2); + assertEquals("ab", hcw1.toString()); + } + + public void testGetHiveChar() throws Exception { + HiveCharWritable hcw = new HiveCharWritable(); + hcw.set("abcd", 10); + assertEquals("abcd ", hcw.getHiveChar().toString()); + } + + public void testGetCharacterLength() throws Exception { + HiveCharWritable hcw = new HiveCharWritable(); + hcw.set("abcd", 10); + assertEquals(4, hcw.getCharacterLength()); + } + + public void testEnforceMaxLength() { + HiveCharWritable hcw1 = new HiveCharWritable(); + hcw1.set("abcdefghij", 10); + assertEquals("abcdefghij", hcw1.toString()); + hcw1.enforceMaxLength(12); + assertEquals("abcdefghij ", hcw1.toString()); + hcw1.enforceMaxLength(5); + assertEquals("abcde", hcw1.toString()); + } + + public void testComparison() throws Exception { + HiveCharWritable hcw1 = new HiveCharWritable(); + HiveCharWritable hcw2 = new HiveCharWritable(); + + // same string + hcw1.set("abcd", 4); + hcw2.set("abcd", 4); + assertEquals(hcw1, hcw2); + assertEquals(hcw2, hcw1); + assertEquals(0, hcw1.compareTo(hcw2)); + assertEquals(0, hcw2.compareTo(hcw1)); + + // unequal strings + hcw1.set("abcd", 4); + hcw2.set("abc", 4); + assertFalse(hcw1.equals(hcw2)); + assertFalse(hcw2.equals(hcw1)); + assertFalse(0 == hcw1.compareTo(hcw2)); + assertFalse(0 == hcw2.compareTo(hcw1)); + + // trailing spaces are not significant + hcw1.set("abcd ", 10); + hcw2.set("abcd", 4); + assertEquals("abcd ", hcw1.toString()); + assertEquals(hcw1, hcw2); + assertEquals(hcw2, hcw1); + assertEquals(0, hcw1.compareTo(hcw2)); + assertEquals(0, hcw2.compareTo(hcw1)); + + // leading spaces are significant + hcw1.set(" abcd", 5); + hcw2.set("abcd", 5); + assertFalse(hcw1.equals(hcw2)); + assertFalse(hcw2.equals(hcw1)); + assertFalse(0 == hcw1.compareTo(hcw2)); + assertFalse(0 == hcw2.compareTo(hcw1)); + } +} diff --git serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java index 798987c..d913d60 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java +++ serde/src/test/org/apache/hadoop/hive/serde2/typeinfo/TestTypeInfoUtils.java @@ -40,6 +40,7 @@ public void testTypeInfoParser() { "int", "string", "varchar(10)", + "char(15)", "array" }; @@ -48,7 +49,11 @@ public void testTypeInfoParser() { "varchar(123", "varchar(123,", "varchar()", - "varchar(" + "varchar(", + "char(123", + "char(123,)", + "char()", + "char(" }; for (String typeString : validTypeStrings) { @@ -59,7 +64,7 @@ public void testTypeInfoParser() { } } - public void testVarcharNoParams() { + public void testQualifiedTypeNoParams() { boolean caughtException = false; try { TypeInfoUtils.getTypeInfoFromTypeString("varchar"); @@ -67,5 +72,12 @@ public void testVarcharNoParams() { caughtException = true; } assertEquals("varchar TypeInfo with no params should fail", true, caughtException); + + try { + TypeInfoUtils.getTypeInfoFromTypeString("char"); + } catch (Exception err) { + caughtException = true; + } + assertEquals("char TypeInfo with no params should fail", true, caughtException); } }