Index: common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java =================================================================== --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/type/HiveBaseChar.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import org.apache.commons.lang.StringUtils; + +public abstract class HiveBaseChar { + protected String value; + protected int characterLength = -1; + + protected HiveBaseChar() { + } + + /** + * Sets the string value to a new value, obeying the max length defined for this object. + * @param val new value + */ + public void setValue(String val, int maxLength) { + characterLength = -1; + value = HiveBaseChar.enforceMaxLength(val, maxLength); + } + + public void setValue(HiveBaseChar val, int maxLength) { + if ((maxLength < 0) + || (val.characterLength > 0 && val.characterLength <= maxLength)) { + // No length enforcement required, or source length is less than max length. + // We can copy the source value as-is. + value = val.value; + this.characterLength = val.characterLength; + } else { + setValue(val.value, maxLength); + } + } + + public static String enforceMaxLength(String val, int maxLength) { + String value = val; + + if (maxLength > 0) { + int valLength = val.codePointCount(0, val.length()); + if (valLength > maxLength) { + // Truncate the excess trailing spaces to fit the character length. + // Also make sure we take supplementary chars into account. + value = val.substring(0, val.offsetByCodePoints(0, maxLength)); + } + } + return value; + } + + public String getValue() { + return value; + } + + public int getCharacterLength() { + if (characterLength < 0) { + characterLength = value.codePointCount(0, value.length()); + } + return characterLength; + } +} Index: common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java =================================================================== --- /dev/null +++ common/src/java/org/apache/hadoop/hive/common/type/HiveVarchar.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +/** + * + * HiveVarChar. + * String wrapper to support SQL VARCHAR features. + * Max string length is enforced. + * + */ +public class HiveVarchar extends HiveBaseChar + implements Comparable { + + public static final int MAX_VARCHAR_LENGTH = Integer.MAX_VALUE; + + public HiveVarchar() { + } + + public HiveVarchar(String val, int len) { + setValue(val, len); + } + + public HiveVarchar(HiveVarchar hc, int len) { + setValue(hc, len); + } + + /** + * Set the new value + */ + public void setValue(String val) { + super.setValue(val, -1); + } + + public void setValue(HiveVarchar hc) { + super.setValue(hc.getValue(), -1); + } + + @Override + public String toString() { + return getValue(); + } + + public int compareTo(HiveVarchar rhs) { + if (rhs == this) { + return 0; + } + return this.getValue().compareTo(rhs.getValue()); + } + + public boolean equals(HiveVarchar rhs) { + if (rhs == this) { + return true; + } + return this.getValue().equals(rhs.getValue()); + } + + public int hashCode() { + return getValue().hashCode(); + } +} Index: common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java =================================================================== --- /dev/null +++ common/src/test/org/apache/hadoop/hive/common/type/TestHiveVarchar.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.common.type; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.LogUtils; +import org.apache.hadoop.hive.common.LogUtils.LogInitializationException; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.util.Random; + + +public class TestHiveVarchar extends TestCase { + public TestHiveVarchar() { + super(); + } + + static Random rnd = new Random(); + + public static int getRandomSupplementaryChar() { + int lowSurrogate = 0xDC00 + rnd.nextInt(1024); + //return 0xD8000000 + lowSurrogate; + int highSurrogate = 0xD800; + return Character.toCodePoint((char)highSurrogate, (char)lowSurrogate); + } + + public static int getRandomCodePoint() { + int codePoint; + if (rnd.nextDouble() < 0.50) { + codePoint = 32 + rnd.nextInt(90); + } else { + codePoint = getRandomSupplementaryChar(); + } + if (!Character.isValidCodePoint(codePoint)) { + System.out.println(Integer.toHexString(codePoint) + " is not a valid code point"); + } + return codePoint; + } + + public static int getRandomCodePoint(int excludeChar) { + while (true) { + int codePoint = getRandomCodePoint(); + if (codePoint != excludeChar) { + return codePoint; + } + } + } + + public void testStringLength() throws Exception { + int strLen = 20; + int[] lengths = { 15, 20, 25 }; + // Try with supplementary characters + for (int idx1 = 0; idx1 < lengths.length; ++idx1) { + // Create random test string + StringBuffer sb = new StringBuffer(); + int curLen = lengths[idx1]; + for (int idx2 = 0; idx2 < curLen; ++idx2) { + sb.appendCodePoint(getRandomCodePoint(' ')); + } + String testString = sb.toString(); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + + // Try with ascii chars + String[] testStrings = { + "abcdefg", + "abcdefghijklmnopqrst", + "abcdefghijklmnopqrstuvwxyz" + }; + for (String testString : testStrings) { + int curLen = testString.length(); + assertEquals(curLen, testString.codePointCount(0, testString.length())); + String enforcedString = HiveBaseChar.enforceMaxLength(testString, strLen); + if (curLen <= strLen) { + // No truncation needed + assertEquals(testString, enforcedString); + } else { + // String should have been truncated. + assertEquals(strLen, enforcedString.codePointCount(0, enforcedString.length())); + } + } + } + + public void testComparison() throws Exception { + HiveVarchar hc1 = new HiveVarchar("abcd", 20); + HiveVarchar hc2 = new HiveVarchar("abcd", 20); + + // Identical strings should be equal + assertTrue(hc1.equals(hc2)); + assertTrue(hc2.equals(hc1)); + assertEquals(0, hc1.compareTo(hc2)); + assertEquals(0, hc2.compareTo(hc1)); + + // Unequal strings + hc2 = new HiveVarchar("abcde", 20); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Trailing spaces are significant + hc2 = new HiveVarchar("abcd ", 30); + + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + + // Leading spaces are significant + hc2 = new HiveVarchar(" abcd", 20); + assertFalse(hc1.equals(hc2)); + assertFalse(hc2.equals(hc1)); + assertFalse(0 == hc1.compareTo(hc2)); + assertFalse(0 == hc2.compareTo(hc1)); + } +} Index: contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java =================================================================== --- contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java +++ contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -90,8 +91,9 @@ for (String columnType : columnTypes) { PrimitiveTypeEntry dstTypeEntry = PrimitiveObjectInspectorUtils .getTypeEntryFromTypeName(columnType); - dstOIns.add(PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(dstTypeEntry.primitiveCategory)); + dstOIns.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + dstTypeEntry.primitiveCategory, + ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveTypeEntry(dstTypeEntry))); } } @@ -154,8 +156,9 @@ PrimitiveTypeEntry srcTypeEntry = PrimitiveObjectInspectorUtils .getTypeEntryFromTypeName(typeName); srcOIns - .add(PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(srcTypeEntry.primitiveCategory)); + .add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + srcTypeEntry.primitiveCategory, + ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveTypeEntry(srcTypeEntry))); converters.add(ObjectInspectorConverters.getConverter(srcOIns.get(pos), dstOIns.get(pos))); } else { Index: data/files/vc1.txt =================================================================== --- /dev/null +++ data/files/vc1.txt @@ -0,0 +1,3 @@ +1abc +2abc +3 abc Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -56,6 +56,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; @@ -130,7 +131,9 @@ List inspectors = new ArrayList(vcCols.size()); for (VirtualColumn vc : vcCols) { inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - vc.getTypeInfo().getPrimitiveCategory())); + vc.getTypeInfo().getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo( + vc.getTypeInfo()))); names.add(vc.getName()); } vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -23,6 +23,7 @@ import java.net.URL; import java.util.ArrayList; import java.util.Collections; +import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -39,6 +40,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; @@ -51,7 +53,6 @@ import org.apache.hadoop.hive.ql.udf.UDFAtan; import org.apache.hadoop.hive.ql.udf.UDFBin; import org.apache.hadoop.hive.ql.udf.UDFCeil; -import org.apache.hadoop.hive.ql.udf.UDFConcat; import org.apache.hadoop.hive.ql.udf.UDFConv; import org.apache.hadoop.hive.ql.udf.UDFCos; import org.apache.hadoop.hive.ql.udf.UDFDate; @@ -75,7 +76,6 @@ import org.apache.hadoop.hive.ql.udf.UDFLog; import org.apache.hadoop.hive.ql.udf.UDFLog10; import org.apache.hadoop.hive.ql.udf.UDFLog2; -import org.apache.hadoop.hive.ql.udf.UDFLower; import org.apache.hadoop.hive.ql.udf.UDFLpad; import org.apache.hadoop.hive.ql.udf.UDFMinute; import org.apache.hadoop.hive.ql.udf.UDFMonth; @@ -123,7 +123,6 @@ import org.apache.hadoop.hive.ql.udf.UDFTrim; import org.apache.hadoop.hive.ql.udf.UDFType; import org.apache.hadoop.hive.ql.udf.UDFUnhex; -import org.apache.hadoop.hive.ql.udf.UDFUpper; import org.apache.hadoop.hive.ql.udf.UDFWeekOfYear; import org.apache.hadoop.hive.ql.udf.UDFYear; import org.apache.hadoop.hive.ql.udf.generic.*; @@ -145,11 +144,17 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.ReflectionUtils; import org.w3c.dom.Document; @@ -190,7 +195,7 @@ public static final HashSet UDAFS_IMPLY_ORDER = new HashSet(); static { - registerUDF("concat", UDFConcat.class, false); + registerGenericUDF("concat", GenericUDFConcat.class); registerUDF("substr", UDFSubstr.class, false); registerUDF("substring", UDFSubstr.class, false); registerUDF("space", UDFSpace.class, false); @@ -234,10 +239,10 @@ registerUDF("hex", UDFHex.class, false); registerUDF("unhex", UDFUnhex.class, false); - registerUDF("upper", UDFUpper.class, false); - registerUDF("lower", UDFLower.class, false); - registerUDF("ucase", UDFUpper.class, false); - registerUDF("lcase", UDFLower.class, false); + registerGenericUDF("upper", GenericUDFUpper.class); + registerGenericUDF("lower", GenericUDFLower.class); + registerGenericUDF("ucase", GenericUDFUpper.class); + registerGenericUDF("lcase", GenericUDFLower.class); registerUDF("trim", UDFTrim.class, false); registerUDF("ltrim", UDFLTrim.class, false); registerUDF("rtrim", UDFRTrim.class, false); @@ -351,6 +356,8 @@ GenericUDFToBinary.class); registerGenericUDF(serdeConstants.DECIMAL_TYPE_NAME, GenericUDFToDecimal.class); + registerGenericUDF(serdeConstants.VARCHAR_TYPE_NAME, + GenericUDFToVarchar.class); // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); @@ -601,24 +608,73 @@ return synonyms; } - static Map numericTypes = new HashMap(); - static List numericTypeList = new ArrayList(); - - static void registerNumericType(String typeName, int level) { - TypeInfo t = TypeInfoFactory.getPrimitiveTypeInfo(typeName); - numericTypeList.add(t); - numericTypes.put(t, level); + static EnumMap numericTypes = + new EnumMap(PrimitiveCategory.class); + static List numericTypeList = new ArrayList(); + + static void registerNumericType(PrimitiveCategory primitiveCategory, int level) { + numericTypeList.add(primitiveCategory); + numericTypes.put(primitiveCategory, level); } static { - registerNumericType(serdeConstants.TINYINT_TYPE_NAME, 1); - registerNumericType(serdeConstants.SMALLINT_TYPE_NAME, 2); - registerNumericType(serdeConstants.INT_TYPE_NAME, 3); - registerNumericType(serdeConstants.BIGINT_TYPE_NAME, 4); - registerNumericType(serdeConstants.FLOAT_TYPE_NAME, 5); - registerNumericType(serdeConstants.DOUBLE_TYPE_NAME, 6); - registerNumericType(serdeConstants.DECIMAL_TYPE_NAME, 7); - registerNumericType(serdeConstants.STRING_TYPE_NAME, 8); + registerNumericType(PrimitiveCategory.BYTE, 1); + registerNumericType(PrimitiveCategory.SHORT, 2); + registerNumericType(PrimitiveCategory.INT, 3); + registerNumericType(PrimitiveCategory.LONG, 4); + registerNumericType(PrimitiveCategory.FLOAT, 5); + registerNumericType(PrimitiveCategory.DOUBLE, 6); + registerNumericType(PrimitiveCategory.DECIMAL, 7); + registerNumericType(PrimitiveCategory.STRING, 8); + } + + static int getCharacterLengthForType(PrimitiveTypeInfo typeInfo) { + switch (typeInfo.getPrimitiveCategory()) { + case STRING: + return HiveVarchar.MAX_VARCHAR_LENGTH; + case VARCHAR: + VarcharTypeParams varcharParams = + (VarcharTypeParams) ((ParameterizedPrimitiveTypeInfo)typeInfo).getParameters(); + if (varcharParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + return varcharParams.getLength(); + default: + return 0; + } + } + + /** + * Given 2 TypeInfo types and the PrimitiveCategory selected as the common class between the two, + * return a TypeInfo corresponding to the common PrimitiveCategory, and with type qualifiers + * (if applicable) that match the 2 TypeInfo types. + * Examples: + * varchar(10), varchar(20), primitive category varchar => varchar(20) + * date, string, primitive category string => string + * @param a TypeInfo of the first type + * @param b TypeInfo of the second type + * @param typeCategory PrimitiveCategory of the designated common type between a and b + * @return TypeInfo represented by the primitive category, with any applicable type qualifiers. + */ + public static TypeInfo getTypeInfoForPrimitiveCategory( + PrimitiveTypeInfo a, PrimitiveTypeInfo b, PrimitiveCategory typeCategory) { + // For types with parameters (like varchar), we need to determine the type parameters + // that should be added to this type, based on the original 2 TypeInfos. + switch (typeCategory) { + case VARCHAR: + int maxLength = Math.max(getCharacterLengthForType(a), getCharacterLengthForType(b)); + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(maxLength); + String typeName = + PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveCategory(typeCategory).typeName + + varcharParams.toString(); + return TypeInfoFactory.getPrimitiveTypeInfo(typeName); + + default: + // Type doesn't require any qualifiers. + return TypeInfoFactory.getPrimitiveTypeInfo( + PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveCategory(typeCategory).typeName); + } } /** @@ -628,18 +684,38 @@ if (a.equals(b)) { return a; } + if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { + return null; + } + PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); + PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); + + if (pcA == pcB) { + // Same primitive category but different qualifiers. + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); + } + + PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); + PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); + // handle string types properly + if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { + return getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + } + if (FunctionRegistry.implicitConvertable(a, b)) { - return b; + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcB); } if (FunctionRegistry.implicitConvertable(b, a)) { - return a; + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); } - for (TypeInfo t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(a, t) - && FunctionRegistry.implicitConvertable(b, t)) { - return t; + for (PrimitiveCategory t : numericTypeList) { + if (FunctionRegistry.implicitConvertable(pcA, t) + && FunctionRegistry.implicitConvertable(pcB, t)) { + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } + return null; } @@ -657,12 +733,34 @@ if (a.equals(b)) { return a; } - for (TypeInfo t : numericTypeList) { - if (FunctionRegistry.implicitConvertable(a, t) - && FunctionRegistry.implicitConvertable(b, t)) { - return t; + if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { + return null; + } + PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); + PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); + + if (pcA == pcB) { + // Same primitive category but different qualifiers. + // Rely on getTypeInfoForPrimitiveCategory() to sort out the type params. + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcA); + } + + PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); + PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); + // handle string types properly + if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { + // Compare as strings. Char comparison semantics may be different if/when implemented. + return getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + } + + for (PrimitiveCategory t : numericTypeList) { + if (FunctionRegistry.implicitConvertable(pcA, t) + && FunctionRegistry.implicitConvertable(pcB, t)) { + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, t); } } + return null; } @@ -678,45 +776,67 @@ if (a.equals(b)) { return a; } - Integer ai = numericTypes.get(a); - Integer bi = numericTypes.get(b); + if (a.getCategory() != Category.PRIMITIVE || b.getCategory() != Category.PRIMITIVE) { + return null; + } + PrimitiveCategory pcA = ((PrimitiveTypeInfo)a).getPrimitiveCategory(); + PrimitiveCategory pcB = ((PrimitiveTypeInfo)b).getPrimitiveCategory(); + + PrimitiveGrouping pgA = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcA); + PrimitiveGrouping pgB = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(pcB); + // handle string types properly + if (pgA == PrimitiveGrouping.STRING_GROUP && pgB == PrimitiveGrouping.STRING_GROUP) { + return getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b,PrimitiveCategory.STRING); + } + + Integer ai = numericTypes.get(pcA); + Integer bi = numericTypes.get(pcB); if (ai == null || bi == null) { // If either is not a numeric type, return null. return null; } - return (ai > bi) ? a : b; + PrimitiveCategory pcCommon = (ai > bi) ? pcA : pcB; + return getTypeInfoForPrimitiveCategory((PrimitiveTypeInfo)a, (PrimitiveTypeInfo)b, pcCommon); } - /** - * Returns whether it is possible to implicitly convert an object of Class - * from to Class to. - */ - public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { - if (from.equals(to)) { + public static boolean isConversionRequiredForComparison(TypeInfo typeA, TypeInfo typeB) { + if (typeA == typeB) { + return false; + } + if (TypeInfoUtils.doPrimitiveCategoriesMatch(typeA, typeB)) { + return false; + } + return true; + } + + public static boolean implicitConvertable(PrimitiveCategory from, PrimitiveCategory to) { + if (from == to) { return true; } + + PrimitiveGrouping fromPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(from); + PrimitiveGrouping toPg = PrimitiveObjectInspectorUtils.getPrimitiveGrouping(to); + // Allow implicit String to Double conversion - if (from.equals(TypeInfoFactory.stringTypeInfo) - && to.equals(TypeInfoFactory.doubleTypeInfo)) { + if (fromPg == PrimitiveGrouping.STRING_GROUP && to == PrimitiveCategory.DOUBLE) { return true; } // Allow implicit String to Decimal conversion - if (from.equals(TypeInfoFactory.stringTypeInfo) - && to.equals(TypeInfoFactory.decimalTypeInfo)) { + if (fromPg == PrimitiveGrouping.STRING_GROUP && to == PrimitiveCategory.DECIMAL) { return true; } // Void can be converted to any type - if (from.equals(TypeInfoFactory.voidTypeInfo)) { + if (from == PrimitiveCategory.VOID) { return true; } // Allow implicit String to Date conversion - if (from.equals(TypeInfoFactory.dateTypeInfo) - && to.equals(TypeInfoFactory.stringTypeInfo)) { + if (fromPg == PrimitiveGrouping.DATE_GROUP && toPg == PrimitiveGrouping.STRING_GROUP) { return true; } - if (from.equals(TypeInfoFactory.timestampTypeInfo) - && to.equals(TypeInfoFactory.stringTypeInfo)) { + // Allow implicit String to varchar conversion, and vice versa + if (fromPg == PrimitiveGrouping.STRING_GROUP && toPg == PrimitiveGrouping.STRING_GROUP) { return true; } @@ -734,6 +854,26 @@ } /** + * Returns whether it is possible to implicitly convert an object of Class + * from to Class to. + */ + public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { + if (from.equals(to)) { + return true; + } + + // Reimplemented to use PrimitiveCategory rather than TypeInfo, because + // 2 TypeInfos from the same qualified type (varchar, decimal) should still be + // seen as equivalent. + if (from.getCategory() == Category.PRIMITIVE && to.getCategory() == Category.PRIMITIVE) { + return implicitConvertable( + ((PrimitiveTypeInfo)from).getPrimitiveCategory(), + ((PrimitiveTypeInfo)to).getPrimitiveCategory()); + } + return false; + } + + /** * Get the GenericUDAF evaluator for the name and argumentClasses. * * @param name @@ -911,7 +1051,8 @@ */ public static int matchCost(TypeInfo argumentPassed, TypeInfo argumentAccepted, boolean exact) { - if (argumentAccepted.equals(argumentPassed)) { + if (argumentAccepted.equals(argumentPassed) + || TypeInfoUtils.doPrimitiveCategoriesMatch(argumentPassed, argumentAccepted)) { // matches return 0; } @@ -1054,9 +1195,15 @@ for (TypeInfo accepted: argumentsAccepted) { TypeInfo reference = referenceIterator.next(); - if (numericTypes.containsKey(accepted)) { + boolean acceptedIsPrimitive = false; + PrimitiveCategory acceptedPrimCat = PrimitiveCategory.UNKNOWN; + if (accepted.getCategory() == Category.PRIMITIVE) { + acceptedIsPrimitive = true; + acceptedPrimCat = ((PrimitiveTypeInfo) accepted).getPrimitiveCategory(); + } + if (acceptedIsPrimitive && numericTypes.containsKey(acceptedPrimCat)) { // We're looking for the udf with the smallest maximum numeric type. - int typeValue = numericTypes.get(accepted); + int typeValue = numericTypes.get(acceptedPrimCat); maxNumericType = typeValue > maxNumericType ? typeValue : maxNumericType; } else if (!accepted.equals(reference)) { // There are non-numeric arguments that don't match from one UDF to @@ -1118,8 +1265,20 @@ bridge.getColNames(), bridge.getColTypes()); } - return (GenericUDF) ReflectionUtils + GenericUDF newInstance = (GenericUDF) ReflectionUtils .newInstance(genericUDF.getClass(), null); + // The original may have settable info that needs to be added to the new copy. + if (genericUDF instanceof SettableUDF) { + try { + Object settableData = ((SettableUDF)genericUDF).getParams(); + if (settableData != null) { + ((SettableUDF)newInstance).setParams(settableData); + } + } catch (UDFArgumentException err) { + LOG.error("Unable to add settable data to UDF " + genericUDF.getClass()); + } + } + return newInstance; } /** @@ -1274,6 +1433,7 @@ udfClass == UDFToDouble.class || udfClass == UDFToFloat.class || udfClass == UDFToInteger.class || udfClass == UDFToLong.class || udfClass == UDFToShort.class || udfClass == UDFToString.class || + udfClass == GenericUDFToVarchar.class || udfClass == GenericUDFTimestamp.class || udfClass == GenericUDFToBinary.class || udfClass == GenericUDFToDate.class; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -52,6 +52,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; @@ -361,7 +362,9 @@ VirtualColumn vc = vcs.get(i); vcsObjectInspectors.add( PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory())); + ((PrimitiveTypeInfo) vc.getTypeInfo()).getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo( + (PrimitiveTypeInfo) vc.getTypeInfo()))); vcNames.add(vc.getName()); } StructObjectInspector vcStructObjectInspector = ObjectInspectorFactory Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SettableUDF.java =================================================================== --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SettableUDF.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +/** + * Interface to allow passing of parameters to the UDF, before it is initialized. + * For example, to be able to pass the char length parameters to a char type cast. + */ +public interface SettableUDF { + + /** + * Add data to UDF prior to initialization. + * An exception may be thrown if the UDF doesn't know what to do with this data. + * @param params UDF-specific data to add to the UDF + */ + void setParams(Object params) throws UDFArgumentException; + + Object getParams(); + +} Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -629,7 +629,7 @@ case HiveParser.TOK_UNIONTYPE: return getUnionTypeStringFromAST(typeNode); default: - return DDLSemanticAnalyzer.getTypeName(typeNode.getType()); + return DDLSemanticAnalyzer.getTypeName(typeNode); } } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -38,6 +38,7 @@ import org.antlr.runtime.tree.CommonTree; import org.antlr.runtime.tree.Tree; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; @@ -148,19 +149,62 @@ TokenToTypeName.put(HiveParser.TOK_FLOAT, serdeConstants.FLOAT_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DOUBLE, serdeConstants.DOUBLE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_VARCHAR, serdeConstants.VARCHAR_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, serdeConstants.DATETIME_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DECIMAL, serdeConstants.DECIMAL_TYPE_NAME); } - public static String getTypeName(int token) throws SemanticException { + public static String addParamsToTypeName(String typeName, List params) { + StringBuilder sb = new StringBuilder(); + sb.append(typeName); + sb.append("("); + sb.append(StringUtils.join(params, ",")); + sb.append(")"); + return sb.toString(); + } + + public static List getCharParams(String typeName, ASTNode node) + throws SemanticException { + if (node.getChildCount() != 1) { + throw new SemanticException("Bad params for type " + typeName); + } + try { + String lengthStr = node.getChild(0).getText(); + Integer length = Integer.valueOf(lengthStr); + if (length.intValue() < 1) { + throw new SemanticException(typeName + " length must be a positive number"); + } + List paramList = new LinkedList(); + paramList.add(length.toString()); + return paramList; + } catch (NumberFormatException err) { + throw new SemanticException("Bad params for type " + typeName); + } + } + + public static String getTypeName(ASTNode node) throws SemanticException { + int token = node.getType(); + String typeName; + // datetime type isn't currently supported if (token == HiveParser.TOK_DATETIME) { throw new SemanticException(ErrorMsg.UNSUPPORTED_TYPE.getMsg()); } - return TokenToTypeName.get(token); + + switch (token) { + case HiveParser.TOK_VARCHAR: + typeName = TokenToTypeName.get(token); + // Need to validate params here + List paramList = getCharParams(typeName, node); + typeName = addParamsToTypeName(typeName, paramList); + break; + default: + typeName = TokenToTypeName.get(token); + } + return typeName; } static class TablePartition { Index: ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -105,6 +105,7 @@ KW_TIMESTAMP: 'TIMESTAMP'; KW_DECIMAL: 'DECIMAL'; KW_STRING: 'STRING'; +KW_VARCHAR: 'VARCHAR'; KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; KW_MAP: 'MAP'; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -110,6 +110,7 @@ TOK_DATETIME; TOK_TIMESTAMP; TOK_STRING; +TOK_VARCHAR; TOK_BINARY; TOK_DECIMAL; TOK_LIST; @@ -1771,6 +1772,7 @@ | KW_STRING -> TOK_STRING | KW_BINARY -> TOK_BINARY | KW_DECIMAL -> TOK_DECIMAL + | KW_VARCHAR LPAREN length=Number RPAREN -> ^(TOK_VARCHAR $length) ; listType Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -171,6 +171,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -5304,6 +5307,44 @@ } /** + * @param column column expression to convert + * @param tableFieldTypeInfo TypeInfo to convert to + * @return Expression converting column to the type specified by tableFieldTypeInfo + */ + ExprNodeDesc createConversionCast(ExprNodeDesc column, PrimitiveTypeInfo tableFieldTypeInfo) + throws SemanticException { + ExprNodeDesc ret; + + // Get base type, since type string may be parameterized + String baseType = TypeInfoUtils.getBaseName(tableFieldTypeInfo.getTypeName()); + BaseTypeParams typeParams = null; + // If TypeInfo is parameterized, provide the params to the UDF factory method. + if (tableFieldTypeInfo instanceof ParameterizedPrimitiveTypeInfo) { + typeParams = + ((ParameterizedPrimitiveTypeInfo)tableFieldTypeInfo).getParameters(); + if (typeParams != null) { + switch (tableFieldTypeInfo.getPrimitiveCategory()) { + case VARCHAR: + // Nothing to do here - the parameter will be passed to the UDF factory method below + break; + default: + throw new SemanticException("Type cast for " + tableFieldTypeInfo.getPrimitiveCategory() + + " does not take type parameters"); + } + } + } + + // If the type cast UDF is for a parameterized type, then it should implement + // the SettableUDF interface so that we can pass in the params. + // Not sure if this is the cleanest solution, but there does need to be a way + // to provide the type params to the type cast. + ret = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDescWithUdfData(baseType, typeParams, column); + + return ret; + } + + /** * Generate the conversion SelectOperator that converts the columns into the * types that are expected by the table_desc. */ @@ -5374,9 +5415,7 @@ // cannot convert to complex types column = null; } else { - column = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), - column); + column = createConversionCast(column, (PrimitiveTypeInfo)tableFieldTypeInfo); } if (column == null) { String reason = "Cannot convert column " + i + " from " @@ -5612,9 +5651,7 @@ // cannot convert to complex types column = null; } else { - column = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), - column); + column = createConversionCast(column, (PrimitiveTypeInfo)tableFieldTypeInfo); } if (column == null) { String reason = "Cannot convert column " + posn + " from " @@ -6191,7 +6228,9 @@ } // Add implicit type conversion if necessary for (int i = 0; i < right.length; i++) { - if (!commonType.equals(keys.get(i).get(k).getTypeInfo())) { + if (FunctionRegistry.isConversionRequiredForComparison( + keys.get(i).get(k).getTypeInfo(), + commonType)) { keys.get(i).set( k, TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( @@ -7526,12 +7565,12 @@ * @param unionalias * The alias of the union. * @return - * @throws UDFArgumentException + * @throws SemanticException */ private Operator genInputSelectForUnion( Operator origInputOp, Map origInputFieldMap, String origInputAlias, RowResolver unionoutRR, String unionalias) - throws UDFArgumentException { + throws SemanticException { List columns = new ArrayList(); boolean needsCast = false; @@ -7542,8 +7581,7 @@ lInfo.getTabAlias(), lInfo.getIsVirtualCol(), lInfo.isSkewedCol()); if (!lInfo.getType().equals(unionEntry.getValue().getType())) { needsCast = true; - column = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( - unionEntry.getValue().getType().getTypeName(), column); + column = createConversionCast(column, (PrimitiveTypeInfo)unionEntry.getValue().getType()); } columns.add(column); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.ColumnInfo; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.SettableUDF; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -61,6 +62,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -564,6 +566,8 @@ serdeConstants.DOUBLE_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_STRING, serdeConstants.STRING_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_VARCHAR, + serdeConstants.VARCHAR_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_BINARY, serdeConstants.BINARY_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_DATE, @@ -646,7 +650,7 @@ * * @throws UDFArgumentException */ - public static ExprNodeDesc getFuncExprNodeDesc(String udfName, + public static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, Object udfData, ExprNodeDesc... children) throws UDFArgumentException { FunctionInfo fi = FunctionRegistry.getFunctionInfo(udfName); @@ -660,11 +664,23 @@ + " is an aggregation function or a table function."); } + // Add udfData to UDF if necessary + if (udfData != null) { + if (genericUDF instanceof SettableUDF) { + ((SettableUDF)genericUDF).setParams(udfData); + } + } + List childrenList = new ArrayList(children.length); childrenList.addAll(Arrays.asList(children)); return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList); } + public static ExprNodeDesc getFuncExprNodeDesc(String udfName, + ExprNodeDesc... children) throws UDFArgumentException { + return getFuncExprNodeDescWithUdfData(udfName, null, children); + } + static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, boolean isFunction, ArrayList children, TypeCheckCtx ctx) throws SemanticException, UDFArgumentException { @@ -761,11 +777,32 @@ } } + // getGenericUDF() actually clones the UDF. Just call it once and reuse. + GenericUDF genericUDF = fi.getGenericUDF(); + if (!fi.isNative()) { ctx.getUnparseTranslator().addIdentifierTranslation( (ASTNode) expr.getChild(0)); } + // Handle type casts that may contain type parameters + if (isFunction) { + ASTNode funcNameNode = (ASTNode)expr.getChild(0); + switch (funcNameNode.getType()) { + case HiveParser.TOK_VARCHAR: + // Add type params + VarcharTypeParams varcharTypeParams = new VarcharTypeParams(); + varcharTypeParams.length = Integer.valueOf((funcNameNode.getChild(0).getText())); + if (genericUDF != null) { + ((SettableUDF)genericUDF).setParams(varcharTypeParams); + } + break; + default: + // Do nothing + break; + } + } + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't // supported if (fi.getGenericUDTF() != null) { @@ -780,16 +817,16 @@ throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); } } - if (!ctx.getAllowStatefulFunctions() && (fi.getGenericUDF() != null)) { - if (FunctionRegistry.isStateful(fi.getGenericUDF())) { + if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { + if (FunctionRegistry.isStateful(genericUDF)) { throw new SemanticException( ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); } } // Try to infer the type of the constant only if there are two // nodes, one of them is column and the other is numeric const - if (fi.getGenericUDF() instanceof GenericUDFBaseCompare + if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2 && ((children.get(0) instanceof ExprNodeConstantDesc && children.get(1) instanceof ExprNodeColumnDesc) @@ -846,7 +883,7 @@ // however, if we already tried this, or the column is NUMBER type and // the operator is EQUAL, return false due to the type mismatch if (triedDouble || - (fi.getGenericUDF() instanceof GenericUDFOPEqual + (genericUDF instanceof GenericUDFOPEqual && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) { return new ExprNodeConstantDesc(false); } @@ -864,7 +901,7 @@ } } - desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), children); + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, children); } // UDFOPPositive is a no-op. // However, we still create it, and then remove it here, to make sure we Index: ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java @@ -20,6 +20,7 @@ import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -143,6 +144,15 @@ } } + public Text evaluate(HiveVarcharWritable i) { + if (i == null) { + return null; + } + // TODO: Should converted string be space-padded, or stripped? + t.set(i.toString()); + return t; + } + public Text evaluate(TimestampWritable i) { if (i == null) { return null; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBaseCompare.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; @@ -29,15 +30,18 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.BooleanWritable; /** @@ -133,36 +137,20 @@ TypeInfo oiTypeInfo0 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[0]); TypeInfo oiTypeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(arguments[1]); - if (oiTypeInfo0 != oiTypeInfo1) { + if (oiTypeInfo0 == oiTypeInfo1 + || TypeInfoUtils.doPrimitiveCategoriesMatch(oiTypeInfo0, oiTypeInfo1)) { + compareType = CompareType.SAME_TYPE; + } else { compareType = CompareType.NEED_CONVERT; + TypeInfo compareType = FunctionRegistry.getCommonClassForComparison(oiTypeInfo0, oiTypeInfo1); - if ((oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) - && oiTypeInfo1.equals(TypeInfoFactory.dateTypeInfo)) - || (oiTypeInfo0.equals(TypeInfoFactory.dateTypeInfo) - && oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo))) { - // Date should be comparable with string - compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( - TypeInfoFactory.stringTypeInfo); - - } else if (oiTypeInfo0.equals(TypeInfoFactory.stringTypeInfo) - || oiTypeInfo1.equals(TypeInfoFactory.stringTypeInfo)) { - // If either argument is a string, we convert to a double because a number - // in string form should always be convertible into a double - compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( - TypeInfoFactory.doubleTypeInfo); - } else { - TypeInfo compareType = FunctionRegistry.getCommonClass(oiTypeInfo0, oiTypeInfo1); - - // For now, we always convert to double if we can't find a common type - compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( - (compareType == null) ? - TypeInfoFactory.doubleTypeInfo : compareType); - } + // For now, we always convert to double if we can't find a common type + compareOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + (compareType == null) ? + TypeInfoFactory.doubleTypeInfo : compareType); converter0 = ObjectInspectorConverters.getConverter(arguments[0], compareOI); converter1 = ObjectInspectorConverters.getConverter(arguments[1], compareOI); - } else { - compareType = CompareType.SAME_TYPE; } } return PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java =================================================================== --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFConcat.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ParameterizedObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.io.BytesWritable; + +/** + * GenericUDFConcat. + */ +@Description(name = "concat", +value = "_FUNC_(str1, str2, ... strN) - returns the concatenation of str1, str2, ... strN or "+ + "_FUNC_(bin1, bin2, ... binN) - returns the concatenation of bytes in binary data " + + " bin1, bin2, ... binN", +extended = "Returns NULL if any argument is NULL.\n" ++ "Example:\n" ++ " > SELECT _FUNC_('abc', 'def') FROM src LIMIT 1;\n" ++ " 'abcdef'") +public class GenericUDFConcat extends GenericUDF { + private ObjectInspector[] argumentOIs; + private StringConverter[] stringConverters; + private PrimitiveCategory returnType = PrimitiveCategory.STRING; + private BytesWritable[] bw; + private GenericUDFUtils.StringHelper returnHelper; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + + // Loop through all the inputs to determine the appropriate return type/length. + // Either all arguments are binary, or all columns are non-binary. + // Return type: + // All VARCHAR inputs: return VARCHAR + // All BINARY inputs: return BINARY + // Otherwise return STRING + argumentOIs = arguments; + + PrimitiveCategory currentCategory; + PrimitiveObjectInspector poi; + boolean fixedLengthReturnValue = true; + int returnLength = 0; // Only for char/varchar return types + for (int idx = 0; idx < arguments.length; ++idx) { + if (arguments[idx].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException("CONCAT only takes primitive arguments"); + } + poi = (PrimitiveObjectInspector)arguments[idx]; + currentCategory = poi.getPrimitiveCategory(); + if (idx == 0) { + returnType = currentCategory; + } + switch (currentCategory) { + case BINARY: + fixedLengthReturnValue = false; + if (returnType != currentCategory) { + throw new UDFArgumentException( + "CONCAT cannot take a mix of binary and non-binary arguments"); + } + break; + case VARCHAR: + if (returnType == PrimitiveCategory.BINARY) { + throw new UDFArgumentException( + "CONCAT cannot take a mix of binary and non-binary arguments"); + } + break; + default: + if (returnType == PrimitiveCategory.BINARY) { + throw new UDFArgumentException( + "CONCAT cannot take a mix of binary and non-binary arguments"); + } + returnType = PrimitiveCategory.STRING; + fixedLengthReturnValue = false; + break; + } + if (fixedLengthReturnValue) { + returnLength += GenericUDFUtils.StringHelper.getFixedStringSizeForType(poi); + } + } + + if (returnType == PrimitiveCategory.BINARY) { + bw = new BytesWritable[arguments.length]; + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + } else { + // treat all inputs as string, the return value will be converted to the appropriate type. + createStringConverters(); + returnHelper = new GenericUDFUtils.StringHelper(returnType); + switch (returnType) { + case STRING: + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + case VARCHAR: + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(returnLength); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + returnType, varcharParams); + default: + throw new UDFArgumentException("Unexpected CONCAT return type of " + returnType); + } + } + } + + private void createStringConverters() { + stringConverters = new StringConverter[argumentOIs.length]; + for (int idx = 0; idx < argumentOIs.length; ++idx) { + stringConverters[idx] = new StringConverter((PrimitiveObjectInspector) argumentOIs[idx]); + } + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + if (returnType == PrimitiveCategory.BINARY) { + return binaryEvaluate(arguments); + } else { + return returnHelper.setReturnValue(stringEvaluate(arguments)); + } + } + + public Object binaryEvaluate(DeferredObject[] arguments) throws HiveException { + int len = 0; + for (int idx = 0; idx < arguments.length; ++idx) { + bw[idx] = ((BinaryObjectInspector)argumentOIs[idx]) + .getPrimitiveWritableObject(arguments[idx].get()); + if (bw[idx] == null){ + return null; + } + len += bw[idx].getLength(); + } + + byte[] out = new byte[len]; + int curLen = 0; + // Need to iterate twice since BytesWritable doesn't support append. + for (BytesWritable bytes : bw){ + System.arraycopy(bytes.getBytes(), 0, out, curLen, bytes.getLength()); + curLen += bytes.getLength(); + } + return new BytesWritable(out); + } + + public String stringEvaluate(DeferredObject[] arguments) throws HiveException { + StringBuilder sb = new StringBuilder(); + for (int idx = 0; idx < arguments.length; ++idx) { + String val = null; + if (arguments[idx] != null) { + val = (String) stringConverters[idx].convert(arguments[idx].get()); + } + if (val == null) { + return null; + } + sb.append(val); + } + return sb.toString(); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("CONCAT("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(","); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java =================================================================== --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFLower.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +/** + * UDFLower. + * + */ +@Description(name = "lower,lcase", +value = "_FUNC_(str) - Returns str with all characters changed to lowercase", +extended = "Example:\n" ++ " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'facebook'") +public class GenericUDFLower extends GenericUDF { + private PrimitiveObjectInspector argumentOI; + private StringConverter stringConverter; + private PrimitiveCategory returnType = PrimitiveCategory.STRING; + private GenericUDFUtils.StringHelper returnHelper; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 0) { + throw new UDFArgumentLengthException( + "LOWER requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException( + "LOWER only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case VARCHAR: + // return type should have same length as the input. + returnType = inputType; + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); + outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + returnType, varcharParams); + break; + default: + returnType = PrimitiveCategory.STRING; + outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + break; + } + returnHelper = new GenericUDFUtils.StringHelper(returnType); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + val = val.toLowerCase(); + return returnHelper.setReturnValue(val); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("lower("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(","); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFReflect2.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; @@ -93,8 +94,12 @@ try { method = findMethod(targetClass, methodName.toString(), null, true); + // While getTypeFor() returns a TypeEntry, we won't actually be able to get any + // type parameter information from this since the TypeEntry is derived from a return type. + PrimitiveTypeEntry typeEntry = getTypeFor(method.getReturnType()); returnOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - getTypeFor(method.getReturnType()).primitiveCategory); + typeEntry.primitiveCategory, + typeEntry.typeParams); returnObj = (Writable) returnOI.getPrimitiveWritableClass().newInstance(); } catch (Exception e) { throw new UDFArgumentException(e); Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java =================================================================== --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToVarchar.java @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.Serializable; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.SettableUDF; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.HiveVarcharConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + + +public class GenericUDFToVarchar extends GenericUDF + implements SettableUDF, Serializable { + private static final Log LOG = LogFactory.getLog(GenericUDFToVarchar.class.getName()); + private PrimitiveObjectInspector argumentOI; + private HiveVarcharConverter converter; + private VarcharTypeParams typeParams; + + public GenericUDFToVarchar() { + LOG.info("testing"); + } + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentException("VARCHAR cast requires a value argument"); + } + try { + argumentOI = (PrimitiveObjectInspector) arguments[0]; + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function VARCHAR takes only primitive types"); + } + + // Check if this UDF has been provided with type params for the output varchar type + SettableHiveVarcharObjectInspector outputOI; + if (typeParams != null) { + outputOI = (SettableHiveVarcharObjectInspector) + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + PrimitiveCategory.VARCHAR, typeParams); + } else { + outputOI = PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector; + } + + converter = new HiveVarcharConverter(argumentOI, outputOI); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + return converter.convert(o0); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 1); + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS VARCHAR("); + String paramsStr = ""; + if (typeParams != null) { + paramsStr = typeParams.toString(); + } + sb.append(paramsStr); + sb.append(")"); + return sb.toString(); + } + +/** + * Provide varchar type parameters for the output object inspector. + * This should be done before the UDF is initialized. + */ + @Override + public void setParams(Object typeParams) throws UDFArgumentException { + if (converter != null) { + LOG.warn("Type converter already initialized, setting type params now will not be useful"); + } + if (typeParams instanceof VarcharTypeParams) { + this.typeParams = (VarcharTypeParams)typeParams; + } else { + throw new UDFArgumentException( + "Was expecting VarcharTypeParams, instead got " + typeParams.getClass().getName()); + } + } + + @Override + public Object getParams() { + return typeParams; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java =================================================================== --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUpper.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.StringConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +/** + * UDFUpper. + * + */ +@Description(name = "upper,ucase", + value = "_FUNC_(str) - Returns str with all characters changed to uppercase", + extended = "Example:\n" + + " > SELECT _FUNC_('Facebook') FROM src LIMIT 1;\n" + " 'FACEBOOK'") +public class GenericUDFUpper extends GenericUDF { + private PrimitiveObjectInspector argumentOI; + private StringConverter stringConverter; + private PrimitiveCategory returnType = PrimitiveCategory.STRING; + private GenericUDFUtils.StringHelper returnHelper; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 0) { + throw new UDFArgumentLengthException( + "UPPER requires 1 argument, got " + arguments.length); + } + + if (arguments[0].getCategory() != Category.PRIMITIVE) { + throw new UDFArgumentException( + "UPPER only takes primitive types, got " + argumentOI.getTypeName()); + } + argumentOI = (PrimitiveObjectInspector) arguments[0]; + + stringConverter = new PrimitiveObjectInspectorConverter.StringConverter(argumentOI); + PrimitiveCategory inputType = argumentOI.getPrimitiveCategory(); + ObjectInspector outputOI = null; + switch (inputType) { + case VARCHAR: + // return type should have same length as the input. + returnType = inputType; + VarcharTypeParams varcharParams = new VarcharTypeParams(); + varcharParams.setLength(GenericUDFUtils.StringHelper.getFixedStringSizeForType(argumentOI)); + outputOI = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + returnType, varcharParams); + break; + default: + returnType = PrimitiveCategory.STRING; + outputOI = PrimitiveObjectInspectorFactory.writableStringObjectInspector; + break; + } + returnHelper = new GenericUDFUtils.StringHelper(returnType); + return outputOI; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + String val = null; + if (arguments[0] != null) { + val = (String) stringConverter.convert(arguments[0].get()); + } + if (val == null) { + return null; + } + val = val.toUpperCase(); + return returnHelper.setReturnValue(val); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("upper("); + if (children.length > 0) { + sb.append(children[0]); + for (int i = 1; i < children.length; i++) { + sb.append(","); + sb.append(children[i]); + } + } + sb.append(")"); + return sb.toString(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUtils.java @@ -29,17 +29,22 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ParameterizedObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.Text; /** @@ -350,6 +355,72 @@ }; /** + * Helper class for UDFs returning string/varchar/char + */ + public static class StringHelper { + + protected Object returnValue; + protected PrimitiveCategory type; + + public StringHelper(PrimitiveCategory type) throws UDFArgumentException { + this.type = type; + switch (type) { + case STRING: + returnValue = new Text(); + break; + case VARCHAR: + returnValue = new HiveVarcharWritable(); + break; + default: + throw new UDFArgumentException("Unexpected non-string type " + type); + } + } + + public Object setReturnValue(String val) throws UDFArgumentException { + if (val == null) { + return null; + } + switch (type) { + case STRING: + ((Text)returnValue).set(val); + return returnValue; + case VARCHAR: + ((HiveVarcharWritable)returnValue).set(val); + return returnValue; + default: + throw new UDFArgumentException("Bad return type " + type); + } + } + + /** + * Helper function to help GenericUDFs determine the return type + * character length for char/varchar. + * @param poi PrimitiveObjectInspector representing the type + * @return character length of the type + * @throws UDFArgumentException + */ + public static int getFixedStringSizeForType(PrimitiveObjectInspector poi) + throws UDFArgumentException { + // TODO: we can support date, int, .. any types which would have a fixed length value + switch (poi.getPrimitiveCategory()) { + case VARCHAR: + VarcharTypeParams varcharParams = null; + if (poi instanceof ParameterizedObjectInspector) { + varcharParams = + (VarcharTypeParams) ((ParameterizedObjectInspector)poi).getTypeParams(); + } + if (varcharParams == null || varcharParams.length < 0) { + throw new UDFArgumentException("varchar type used without type params"); + } + return varcharParams.length; + default: + throw new UDFArgumentException("No fixed size for type " + poi.getTypeName()); + } + } + + } + + /** * Return an ordinal from an integer. */ public static String getOrdinal(int i) { Index: ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/TestFunctionRegistry.java @@ -24,10 +24,20 @@ import junit.framework.TestCase; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BytesWritable; @@ -38,16 +48,25 @@ public class TestUDF { public void same(DoubleWritable x, DoubleWritable y) {} public void same(HiveDecimalWritable x, HiveDecimalWritable y) {} + public void same(Text x, Text y) {} public void one(IntWritable x, HiveDecimalWritable y) {} public void one(IntWritable x, DoubleWritable y) {} public void one(IntWritable x, IntWritable y) {} public void mismatch(DateWritable x, HiveDecimalWritable y) {} public void mismatch(TimestampWritable x, HiveDecimalWritable y) {} public void mismatch(BytesWritable x, DoubleWritable y) {} } + TypeInfo varchar5; + TypeInfo varchar10; + TypeInfo maxVarchar; + @Override protected void setUp() { + String maxVarcharTypeName = "varchar(" + HiveVarchar.MAX_VARCHAR_LENGTH + ")"; + maxVarchar = TypeInfoFactory.getPrimitiveTypeInfo(maxVarcharTypeName); + varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); + varchar5 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(5)"); } private void implicit(TypeInfo a, TypeInfo b, boolean convertible) { @@ -61,6 +80,19 @@ implicit(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, true); implicit(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.decimalTypeInfo, false); implicit(TypeInfoFactory.timestampTypeInfo, TypeInfoFactory.decimalTypeInfo, false); + implicit(TypeInfoFactory.varcharTypeInfo, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.varcharTypeInfo, true); + + // Try with parameterized varchar types + TypeInfo varchar10 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(10)"); + TypeInfo varchar20 = TypeInfoFactory.getPrimitiveTypeInfo("varchar(20)"); + + implicit(varchar10, TypeInfoFactory.stringTypeInfo, true); + implicit(varchar20, TypeInfoFactory.stringTypeInfo, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar10, true); + implicit(TypeInfoFactory.stringTypeInfo, varchar20, true); + implicit(varchar20, varchar10, true); +// implicit(TypeInfoFactory.intTypeInfo, varchar10, true); } private void verify(Class udf, String name, TypeInfo ta, TypeInfo tb, @@ -114,7 +146,7 @@ } private void common(TypeInfo a, TypeInfo b, TypeInfo result) { - assertEquals(FunctionRegistry.getCommonClass(a,b), result); + assertEquals(result, FunctionRegistry.getCommonClass(a,b)); } public void testCommonClass() { @@ -126,10 +158,13 @@ TypeInfoFactory.decimalTypeInfo); common(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + + common(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + common(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); } private void comparison(TypeInfo a, TypeInfo b, TypeInfo result) { - assertEquals(FunctionRegistry.getCommonClassForComparison(a,b), result); + assertEquals(result, FunctionRegistry.getCommonClassForComparison(a,b)); } public void testCommonClassComparison() { @@ -141,6 +176,95 @@ TypeInfoFactory.decimalTypeInfo); comparison(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.doubleTypeInfo); + + comparison(TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + comparison(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.dateTypeInfo, + TypeInfoFactory.stringTypeInfo); + + comparison(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + comparison(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + comparison(varchar5, varchar10, varchar10); + } + + /** + * Method to print out the comparison/conversion behavior for data types. + */ + public void testPrintTypeCompatibility() { + if (true) { + return; + } + + String[] typeStrings = { + "void", "boolean", "tinyint", "smallint", "int", "bigint", "float", "double", + "string", "timestamp", "date", "binary", "decimal", "varchar(10)", "varchar(5)", + }; + for (String cat1 : typeStrings) { + TypeInfo ti1 = null; + try { + ti1 = TypeInfoUtils.getTypeInfoFromTypeString(cat1); + } catch (Exception err) { + System.out.println(err); + System.out.println("Unable to get TypeInfo for " + cat1 + ", skipping ..."); + continue; + } + + for (String cat2 : typeStrings) { + TypeInfo commonClass = null; + boolean implicitConvertable = false; + try { + TypeInfo ti2 = TypeInfoUtils.getTypeInfoFromTypeString(cat2); + try { + commonClass = FunctionRegistry.getCommonClassForComparison(ti1, ti2); + //implicitConvertable = FunctionRegistry.implicitConvertable(ti1, ti2); + } catch (Exception err) { + System.out.println("Failed to get common class for " + ti1 + ", " + ti2 + ": " + err); + err.printStackTrace(); + //System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ..."); + } + System.out.println(cat1 + " - " + cat2 + ": " + commonClass); + //System.out.println(cat1 + " - " + cat2 + ": " + implicitConvertable); + } catch (Exception err) { + System.out.println(err); + System.out.println("Unable to get TypeInfo for " + cat2 + ", skipping ..."); + continue; + } + } + } + } + + private void unionAll(TypeInfo a, TypeInfo b, TypeInfo result) { + assertEquals(result, FunctionRegistry.getCommonClassForUnionAll(a,b)); + } + + public void testCommonClassUnionAll() { + unionAll(TypeInfoFactory.intTypeInfo, TypeInfoFactory.decimalTypeInfo, + TypeInfoFactory.decimalTypeInfo); + unionAll(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.decimalTypeInfo, + TypeInfoFactory.decimalTypeInfo); + unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.decimalTypeInfo, + TypeInfoFactory.decimalTypeInfo); + unionAll(TypeInfoFactory.doubleTypeInfo, TypeInfoFactory.stringTypeInfo, + TypeInfoFactory.stringTypeInfo); + + unionAll(varchar5, varchar10, varchar10); + unionAll(varchar10, varchar5, varchar10); + unionAll(varchar10, TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); + unionAll(TypeInfoFactory.stringTypeInfo, varchar10, TypeInfoFactory.stringTypeInfo); + } + + public void testGetTypeInfoForPrimitiveCategory() { + // varchar should take string length into account. + // varchar(5), varchar(10) => varchar(10) + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar5, (PrimitiveTypeInfo) varchar10, PrimitiveCategory.VARCHAR)); + assertEquals(varchar10, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) varchar5, PrimitiveCategory.VARCHAR)); + + // non-qualified types should simply return the TypeInfo associated with that type + assertEquals(TypeInfoFactory.stringTypeInfo, FunctionRegistry.getTypeInfoForPrimitiveCategory( + (PrimitiveTypeInfo) varchar10, (PrimitiveTypeInfo) TypeInfoFactory.stringTypeInfo, + PrimitiveCategory.STRING)); } @Override Index: ql/src/test/queries/clientpositive/varchar_1.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_1.q @@ -0,0 +1,32 @@ +drop table varchar1; +drop table varchar1_1; + +create table varchar1 (key varchar(10), value varchar(20)); +create table varchar1_1 (key string, value string); + +-- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1; +select * from varchar1 limit 2; + +-- insert overwrite, from same/different length varchar +insert overwrite table varchar1 + select cast(key as varchar(10)), cast(value as varchar(15)) from src limit 2; +select key, value from varchar1; + +-- insert overwrite, from string +insert overwrite table varchar1 + select key, value from src limit 2; +select key, value from varchar1; + +-- insert string from varchar +insert overwrite table varchar1_1 + select key, value from varchar1 limit 2; +select key, value from varchar1_1; + +-- respect string length +insert overwrite table varchar1 + select key, cast(value as varchar(3)) from src limit 2; +select key, value from varchar1; + +drop table varchar1; +drop table varchar1_1; Index: ql/src/test/queries/clientpositive/varchar_2.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_2.q @@ -0,0 +1,36 @@ +drop table varchar_2; + +create table varchar_2 ( + key varchar(10), + value varchar(20) +); + +insert overwrite table varchar_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value desc +limit 5; + +drop table varchar_2; Index: ql/src/test/queries/clientpositive/varchar_comparison.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_comparison.q @@ -0,0 +1,40 @@ + +-- Should all be true +select + cast('abc' as varchar(10)) = cast('abc' as varchar(10)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) < cast('abd' as varchar(10)), + cast('abc' as varchar(10)) > cast('abb' as varchar(10)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(10)) +from src limit 1; + +-- Different varchar lengths should still compare the same +select + cast('abc' as varchar(10)) = cast('abc' as varchar(3)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) < cast('abd' as varchar(3)), + cast('abc' as varchar(10)) > cast('abb' as varchar(3)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(3)) +from src limit 1; + +-- Should work with string types as well +select + cast('abc' as varchar(10)) = 'abc', + cast('abc' as varchar(10)) <= 'abc', + cast('abc' as varchar(10)) >= 'abc', + cast('abc' as varchar(10)) < 'abd', + cast('abc' as varchar(10)) > 'abb', + cast('abc' as varchar(10)) <> 'abb' +from src limit 1; + +-- leading space is significant for varchar +select + cast(' abc' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1; + +-- trailing space is significant for varchar +select + cast('abc ' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1; Index: ql/src/test/queries/clientpositive/varchar_join1.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_join1.q @@ -0,0 +1,35 @@ +drop table varchar_join1_vc1; +drop table varchar_join1_vc2; +drop table varchar_join1_str; + +create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +); + +create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +); + +create table varchar_join1_str ( + c1 int, + c2 string +); + +load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1; +load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2; +load data local inpath '../data/files/vc1.txt' into table varchar_join1_str; + +-- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1; + +-- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1; + +-- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1; + +drop table varchar_join1_vc1; +drop table varchar_join1_vc2; +drop table varchar_join1_str; Index: ql/src/test/queries/clientpositive/varchar_nested_types.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_nested_types.q @@ -0,0 +1,53 @@ +drop table varchar_nested_1; +drop table varchar_nested_array; +drop table varchar_nested_map; +drop table varchar_nested_struct; +drop table varchar_nested_cta; +drop table varchar_nested_view; + +create table varchar_nested_1 (key int, value varchar(20)); +insert overwrite table varchar_nested_1 + select key, value from src limit 5; + +-- arrays +create table varchar_nested_array (c1 array); +insert overwrite table varchar_nested_array + select array(value, value) from varchar_nested_1; +describe varchar_nested_array; +select * from varchar_nested_array; + +-- maps +create table varchar_nested_map (c1 map); +insert overwrite table varchar_nested_map + select map(key, value) from varchar_nested_1; +describe varchar_nested_map; +select * from varchar_nested_map; + +-- structs +create table varchar_nested_struct (c1 struct); +insert overwrite table varchar_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from varchar_nested_1; +describe varchar_nested_struct; +select * from varchar_nested_struct; + +-- nested type with create table as +create table varchar_nested_cta as + select * from varchar_nested_struct; +describe varchar_nested_cta; +select * from varchar_nested_cta; + +-- nested type with view +create table varchar_nested_view as + select * from varchar_nested_struct; +describe varchar_nested_view; +select * from varchar_nested_view; + +drop table varchar_nested_1; +drop table varchar_nested_array; +drop table varchar_nested_map; +drop table varchar_nested_struct; +drop table varchar_nested_cta; +drop table varchar_nested_view; Index: ql/src/test/queries/clientpositive/varchar_union1.q =================================================================== --- /dev/null +++ ql/src/test/queries/clientpositive/varchar_union1.q @@ -0,0 +1,47 @@ +drop table varchar_union1_vc1; +drop table varchar_union1_vc2; +drop table varchar_union1_str; + +create table varchar_union1_vc1 ( + c1 int, + c2 varchar(10) +); + +create table varchar_union1_vc2 ( + c1 int, + c2 varchar(20) +); + +create table varchar_union1_str ( + c1 int, + c2 string +); + +load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1; +load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2; +load data local inpath '../data/files/vc1.txt' into table varchar_union1_str; + +-- union varchar with same length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc1 limit 1 +) q1; + +-- union varchar with different length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc2 limit 1 +) q1; + +-- union varchar with string +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_str limit 1 +) q1; + +drop table varchar_union1_vc1; +drop table varchar_union1_vc2; +drop table varchar_union1_str; Index: ql/src/test/results/clientpositive/varchar_1.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/varchar_1.q.out @@ -0,0 +1,196 @@ +PREHOOK: query: drop table varchar1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar1_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar1_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar1 (key varchar(10), value varchar(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar1 (key varchar(10), value varchar(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar1 +PREHOOK: query: create table varchar1_1 (key string, value string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar1_1 (key string, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar1_1 +PREHOOK: query: -- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- load from file +load data local inpath '../data/files/srcbucket0.txt' overwrite into table varchar1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar1 +PREHOOK: query: select * from varchar1 limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar1 limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +474 val_475 +62 val_63 +PREHOOK: query: -- insert overwrite, from same/different length varchar +insert overwrite table varchar1 + select cast(key as varchar(10)), cast(value as varchar(15)) from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- insert overwrite, from same/different length varchar +insert overwrite table varchar1 + select cast(key as varchar(10)), cast(value as varchar(15)) from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from varchar1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +PREHOOK: query: -- insert overwrite, from string +insert overwrite table varchar1 + select key, value from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- insert overwrite, from string +insert overwrite table varchar1 + select key, value from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, value from varchar1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +238 val_238 +86 val_86 +PREHOOK: query: -- insert string from varchar +insert overwrite table varchar1_1 + select key, value from varchar1 limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +PREHOOK: Output: default@varchar1_1 +POSTHOOK: query: -- insert string from varchar +insert overwrite table varchar1_1 + select key, value from varchar1 limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +POSTHOOK: Output: default@varchar1_1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: select key, value from varchar1_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1_1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +238 val_238 +86 val_86 +PREHOOK: query: -- respect string length +insert overwrite table varchar1 + select key, cast(value as varchar(3)) from src limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar1 +POSTHOOK: query: -- respect string length +insert overwrite table varchar1 + select key, cast(value as varchar(3)) from src limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: select key, value from varchar1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from varchar1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar1 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +238 val +86 val +PREHOOK: query: drop table varchar1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar1 +PREHOOK: Output: default@varchar1 +POSTHOOK: query: drop table varchar1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar1 +POSTHOOK: Output: default@varchar1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar1_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar1_1 +PREHOOK: Output: default@varchar1_1 +POSTHOOK: query: drop table varchar1_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar1_1 +POSTHOOK: Output: default@varchar1_1 +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar1_1.key SIMPLE [(varchar1)varchar1.FieldSchema(name:key, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: varchar1_1.value SIMPLE [(varchar1)varchar1.FieldSchema(name:value, type:varchar(20), comment:null), ] Index: ql/src/test/results/clientpositive/varchar_2.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/varchar_2.q.out @@ -0,0 +1,131 @@ +PREHOOK: query: drop table varchar_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_2 ( + key varchar(10), + value varchar(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_2 ( + key varchar(10), + value varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_2 +PREHOOK: query: insert overwrite table varchar_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: insert overwrite table varchar_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from varchar_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table varchar_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_2 +PREHOOK: Output: default@varchar_2 +POSTHOOK: query: drop table varchar_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_2 +POSTHOOK: Output: default@varchar_2 +POSTHOOK: Lineage: varchar_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/varchar_comparison.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/varchar_comparison.q.out @@ -0,0 +1,105 @@ +PREHOOK: query: -- Should all be true +select + cast('abc' as varchar(10)) = cast('abc' as varchar(10)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) < cast('abd' as varchar(10)), + cast('abc' as varchar(10)) > cast('abb' as varchar(10)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Should all be true +select + cast('abc' as varchar(10)) = cast('abc' as varchar(10)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(10)), + cast('abc' as varchar(10)) < cast('abd' as varchar(10)), + cast('abc' as varchar(10)) > cast('abb' as varchar(10)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- Different varchar lengths should still compare the same +select + cast('abc' as varchar(10)) = cast('abc' as varchar(3)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) < cast('abd' as varchar(3)), + cast('abc' as varchar(10)) > cast('abb' as varchar(3)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(3)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Different varchar lengths should still compare the same +select + cast('abc' as varchar(10)) = cast('abc' as varchar(3)), + cast('abc' as varchar(10)) <= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) >= cast('abc' as varchar(3)), + cast('abc' as varchar(10)) < cast('abd' as varchar(3)), + cast('abc' as varchar(10)) > cast('abb' as varchar(3)), + cast('abc' as varchar(10)) <> cast('abb' as varchar(3)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- Should work with string types as well +select + cast('abc' as varchar(10)) = 'abc', + cast('abc' as varchar(10)) <= 'abc', + cast('abc' as varchar(10)) >= 'abc', + cast('abc' as varchar(10)) < 'abd', + cast('abc' as varchar(10)) > 'abb', + cast('abc' as varchar(10)) <> 'abb' +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- Should work with string types as well +select + cast('abc' as varchar(10)) = 'abc', + cast('abc' as varchar(10)) <= 'abc', + cast('abc' as varchar(10)) >= 'abc', + cast('abc' as varchar(10)) < 'abd', + cast('abc' as varchar(10)) > 'abb', + cast('abc' as varchar(10)) <> 'abb' +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true true true true true true +PREHOOK: query: -- leading space is significant for varchar +select + cast(' abc' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- leading space is significant for varchar +select + cast(' abc' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true +PREHOOK: query: -- trailing space is significant for varchar +select + cast('abc ' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- trailing space is significant for varchar +select + cast('abc ' as varchar(10)) <> cast('abc' as varchar(10)) +from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +true Index: ql/src/test/results/clientpositive/varchar_join1.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/varchar_join1.q.out @@ -0,0 +1,130 @@ +PREHOOK: query: drop table varchar_join1_vc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_vc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_join1_vc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_vc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_join1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_join1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_join1_vc1 ( + c1 int, + c2 varchar(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_join1_vc2 ( + c1 int, + c2 varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: create table varchar_join1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_join1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_join1_str +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_vc2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_str +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_join1_str +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_join1_str +PREHOOK: query: -- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with same length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc1 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_vc1 +PREHOOK: Input: default@varchar_join1_vc2 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with different length varchar +select * from varchar_join1_vc1 a join varchar_join1_vc2 b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_vc1 +POSTHOOK: Input: default@varchar_join1_vc2 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: -- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_join1_str +PREHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- Join varchar with string +select * from varchar_join1_vc1 a join varchar_join1_str b on (a.c2 = b.c2) order by a.c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_join1_str +POSTHOOK: Input: default@varchar_join1_vc1 +#### A masked pattern was here #### +1 abc 1 abc +2 abc 2 abc +3 abc 3 abc +PREHOOK: query: drop table varchar_join1_vc1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_vc1 +PREHOOK: Output: default@varchar_join1_vc1 +POSTHOOK: query: drop table varchar_join1_vc1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_vc1 +POSTHOOK: Output: default@varchar_join1_vc1 +PREHOOK: query: drop table varchar_join1_vc2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_vc2 +PREHOOK: Output: default@varchar_join1_vc2 +POSTHOOK: query: drop table varchar_join1_vc2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_vc2 +POSTHOOK: Output: default@varchar_join1_vc2 +PREHOOK: query: drop table varchar_join1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_join1_str +PREHOOK: Output: default@varchar_join1_str +POSTHOOK: query: drop table varchar_join1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_join1_str +POSTHOOK: Output: default@varchar_join1_str Index: ql/src/test/results/clientpositive/varchar_nested_types.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/varchar_nested_types.q.out @@ -0,0 +1,363 @@ +PREHOOK: query: drop table varchar_nested_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_array +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_array +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_map +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_map +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_struct +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_struct +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_cta +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_cta +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_nested_view +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_nested_view +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_nested_1 (key int, value varchar(20)) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_nested_1 (key int, value varchar(20)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_1 +PREHOOK: query: insert overwrite table varchar_nested_1 + select key, value from src limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@varchar_nested_1 +POSTHOOK: query: insert overwrite table varchar_nested_1 + select key, value from src limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@varchar_nested_1 +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- arrays +create table varchar_nested_array (c1 array) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- arrays +create table varchar_nested_array (c1 array) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_array +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table varchar_nested_array + select array(value, value) from varchar_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_array +POSTHOOK: query: insert overwrite table varchar_nested_array + select array(value, value) from varchar_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_array +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_array +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_array +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 array None +PREHOOK: query: select * from varchar_nested_array +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_array +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_array +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_array +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +["val_238","val_238"] +["val_86","val_86"] +["val_311","val_311"] +["val_27","val_27"] +["val_165","val_165"] +PREHOOK: query: -- maps +create table varchar_nested_map (c1 map) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- maps +create table varchar_nested_map (c1 map) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_map +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: insert overwrite table varchar_nested_map + select map(key, value) from varchar_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_map +POSTHOOK: query: insert overwrite table varchar_nested_map + select map(key, value) from varchar_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_map +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_map +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_map +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 map None +PREHOOK: query: select * from varchar_nested_map +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_map +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_map +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_map +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{238:"val_238"} +{86:"val_86"} +{311:"val_311"} +{27:"val_27"} +{165:"val_165"} +PREHOOK: query: -- structs +create table varchar_nested_struct (c1 struct) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- structs +create table varchar_nested_struct (c1 struct) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_nested_struct +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: insert overwrite table varchar_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from varchar_nested_1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_struct +POSTHOOK: query: insert overwrite table varchar_nested_struct + select named_struct('a', key, + 'b', value, + 'c', cast(value as string)) + from varchar_nested_1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_struct +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_struct +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_struct +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from varchar_nested_struct +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_struct +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_struct +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_struct +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{"a":238,"b":"val_238","c":"val_238"} +{"a":86,"b":"val_86","c":"val_86"} +{"a":311,"b":"val_311","c":"val_311"} +{"a":27,"b":"val_27","c":"val_27"} +{"a":165,"b":"val_165","c":"val_165"} +PREHOOK: query: -- nested type with create table as +create table varchar_nested_cta as + select * from varchar_nested_struct +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@varchar_nested_struct +POSTHOOK: query: -- nested type with create table as +create table varchar_nested_cta as + select * from varchar_nested_struct +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@varchar_nested_struct +POSTHOOK: Output: default@varchar_nested_cta +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_cta +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_cta +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from varchar_nested_cta +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_cta +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_cta +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_cta +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{"a":238,"b":"val_238","c":"val_238"} +{"a":86,"b":"val_86","c":"val_86"} +{"a":311,"b":"val_311","c":"val_311"} +{"a":27,"b":"val_27","c":"val_27"} +{"a":165,"b":"val_165","c":"val_165"} +PREHOOK: query: -- nested type with view +create table varchar_nested_view as + select * from varchar_nested_struct +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@varchar_nested_struct +POSTHOOK: query: -- nested type with view +create table varchar_nested_view as + select * from varchar_nested_struct +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@varchar_nested_struct +POSTHOOK: Output: default@varchar_nested_view +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: describe varchar_nested_view +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe varchar_nested_view +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +c1 struct None +PREHOOK: query: select * from varchar_nested_view +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_nested_view +#### A masked pattern was here #### +POSTHOOK: query: select * from varchar_nested_view +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_nested_view +#### A masked pattern was here #### +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +{"a":238,"b":"val_238","c":"val_238"} +{"a":86,"b":"val_86","c":"val_86"} +{"a":311,"b":"val_311","c":"val_311"} +{"a":27,"b":"val_27","c":"val_27"} +{"a":165,"b":"val_165","c":"val_165"} +PREHOOK: query: drop table varchar_nested_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_1 +PREHOOK: Output: default@varchar_nested_1 +POSTHOOK: query: drop table varchar_nested_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_1 +POSTHOOK: Output: default@varchar_nested_1 +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_array +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_array +PREHOOK: Output: default@varchar_nested_array +POSTHOOK: query: drop table varchar_nested_array +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_array +POSTHOOK: Output: default@varchar_nested_array +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_map +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_map +PREHOOK: Output: default@varchar_nested_map +POSTHOOK: query: drop table varchar_nested_map +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_map +POSTHOOK: Output: default@varchar_nested_map +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_struct +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_struct +PREHOOK: Output: default@varchar_nested_struct +POSTHOOK: query: drop table varchar_nested_struct +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_struct +POSTHOOK: Output: default@varchar_nested_struct +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_cta +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_cta +PREHOOK: Output: default@varchar_nested_cta +POSTHOOK: query: drop table varchar_nested_cta +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_cta +POSTHOOK: Output: default@varchar_nested_cta +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +PREHOOK: query: drop table varchar_nested_view +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_nested_view +PREHOOK: Output: default@varchar_nested_view +POSTHOOK: query: drop table varchar_nested_view +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_nested_view +POSTHOOK: Output: default@varchar_nested_view +POSTHOOK: Lineage: varchar_nested_1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: varchar_nested_array.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_map.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] +POSTHOOK: Lineage: varchar_nested_struct.c1 EXPRESSION [(varchar_nested_1)varchar_nested_1.FieldSchema(name:key, type:int, comment:null), (varchar_nested_1)varchar_nested_1.FieldSchema(name:value, type:varchar(20), comment:null), ] Index: ql/src/test/results/clientpositive/varchar_union1.q.out =================================================================== --- /dev/null +++ ql/src/test/results/clientpositive/varchar_union1.q.out @@ -0,0 +1,157 @@ +PREHOOK: query: drop table varchar_union1_vc1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_union1_vc1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_union1_vc2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_union1_vc2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table varchar_union1_str +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table varchar_union1_str +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table varchar_union1_vc1 ( + c1 int, + c2 varchar(10) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_union1_vc1 ( + c1 int, + c2 varchar(10) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_union1_vc1 +PREHOOK: query: create table varchar_union1_vc2 ( + c1 int, + c2 varchar(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_union1_vc2 ( + c1 int, + c2 varchar(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_union1_vc2 +PREHOOK: query: create table varchar_union1_str ( + c1 int, + c2 string +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table varchar_union1_str ( + c1 int, + c2 string +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@varchar_union1_str +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_union1_vc1 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc1 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_union1_vc1 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2 +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_union1_vc2 +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_vc2 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_union1_vc2 +PREHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_str +PREHOOK: type: LOAD +PREHOOK: Output: default@varchar_union1_str +POSTHOOK: query: load data local inpath '../data/files/vc1.txt' into table varchar_union1_str +POSTHOOK: type: LOAD +POSTHOOK: Output: default@varchar_union1_str +PREHOOK: query: -- union varchar with same length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc1 limit 1 +) q1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- union varchar with same length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc1 limit 1 +) q1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: -- union varchar with different length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc2 limit 1 +) q1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_union1_vc1 +PREHOOK: Input: default@varchar_union1_vc2 +#### A masked pattern was here #### +POSTHOOK: query: -- union varchar with different length varchar +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_vc2 limit 1 +) q1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_union1_vc1 +POSTHOOK: Input: default@varchar_union1_vc2 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: -- union varchar with string +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_str limit 1 +) q1 +PREHOOK: type: QUERY +PREHOOK: Input: default@varchar_union1_str +PREHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +POSTHOOK: query: -- union varchar with string +select * from ( + select * from varchar_union1_vc1 + union all + select * from varchar_union1_str limit 1 +) q1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@varchar_union1_str +POSTHOOK: Input: default@varchar_union1_vc1 +#### A masked pattern was here #### +1 abc +1 abc +2 abc +3 abc +PREHOOK: query: drop table varchar_union1_vc1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_union1_vc1 +PREHOOK: Output: default@varchar_union1_vc1 +POSTHOOK: query: drop table varchar_union1_vc1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_union1_vc1 +POSTHOOK: Output: default@varchar_union1_vc1 +PREHOOK: query: drop table varchar_union1_vc2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_union1_vc2 +PREHOOK: Output: default@varchar_union1_vc2 +POSTHOOK: query: drop table varchar_union1_vc2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_union1_vc2 +POSTHOOK: Output: default@varchar_union1_vc2 +PREHOOK: query: drop table varchar_union1_str +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@varchar_union1_str +PREHOOK: Output: default@varchar_union1_str +POSTHOOK: query: drop table varchar_union1_str +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@varchar_union1_str +POSTHOOK: Output: default@varchar_union1_str Index: serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java =================================================================== --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -79,6 +79,8 @@ public static final String STRING_TYPE_NAME = "string"; + public static final String VARCHAR_TYPE_NAME = "varchar"; + public static final String DATE_TYPE_NAME = "date"; public static final String DATETIME_TYPE_NAME = "datetime"; @@ -112,6 +114,7 @@ PrimitiveTypes.add("float"); PrimitiveTypes.add("double"); PrimitiveTypes.add("string"); + PrimitiveTypes.add("varchar"); PrimitiveTypes.add("date"); PrimitiveTypes.add("datetime"); PrimitiveTypes.add("timestamp"); Index: serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; @@ -284,6 +285,13 @@ sb.append('"'); break; } + case VARCHAR: { + sb.append('"'); + sb.append(escapeString(((HiveVarcharObjectInspector) poi) + .getPrimitiveJavaObject(o).toString())); + sb.append('"'); + break; + } case DATE: { sb.append('"'); sb.append(((DateObjectInspector) poi) Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -92,7 +93,8 @@ case PRIMITIVE: PrimitiveTypeInfo pti = (PrimitiveTypeInfo)ti; result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(pti.getPrimitiveCategory()); + .getPrimitiveJavaObjectInspector(pti.getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(pti)); break; case STRUCT: StructTypeInfo sti = (StructTypeInfo)ti; Index: serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; @@ -31,14 +33,16 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -56,14 +60,19 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils.HiveVarcharSerDeHelper; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -276,48 +285,18 @@ } case STRING: { Text r = reuse == null ? new Text() : (Text) reuse; - // Get the actual length first - int start = buffer.tell(); - int length = 0; - do { - byte b = buffer.read(invert); - if (b == 0) { - // end of string - break; - } - if (b == 1) { - // the last char is an escape char. read the actual char - buffer.read(invert); - } - length++; - } while (true); + return deserializeText(buffer, invert, r); + } - if (length == buffer.tell() - start) { - // No escaping happened, so we are already done. - r.set(buffer.getData(), start, length); - } else { - // Escaping happened, we need to copy byte-by-byte. - // 1. Set the length first. - r.set(buffer.getData(), start, length); - // 2. Reset the pointer. - buffer.seek(start); - // 3. Copy the data. - byte[] rdata = r.getBytes(); - for (int i = 0; i < length; i++) { - byte b = buffer.read(invert); - if (b == 1) { - // The last char is an escape char, read the actual char. - // The serialization format escape \0 to \1, and \1 to \2, - // to make sure the string is null-terminated. - b = (byte) (buffer.read(invert) - 1); - } - rdata[i] = b; - } - // 4. Read the null terminator. - byte b = buffer.read(invert); - assert (b == 0); - } - return r; + case VARCHAR: { + HiveVarcharWritable r = + reuse == null ? new HiveVarcharWritable() : (HiveVarcharWritable) reuse; + // Use HiveVarchar's internal Text member to read the value. + deserializeText(buffer, invert, r.getTextValue()); + // If we cache helper data for deserialization we could avoid having + // to call getVarcharMaxLength() on every deserialize call. + r.enforceMaxLength(getVarcharMaxLength(type)); + return r; } case BINARY: { @@ -552,6 +531,61 @@ return v; } + static int getVarcharMaxLength(TypeInfo type) { + VarcharTypeParams typeParams = null; + if (type instanceof ParameterizedPrimitiveTypeInfo) { + typeParams = (VarcharTypeParams)((ParameterizedPrimitiveTypeInfo)type).getParameters(); + return typeParams.length; + } + return -1; + } + + static Text deserializeText(InputByteBuffer buffer, boolean invert, Text r) + throws IOException { + // Get the actual length first + int start = buffer.tell(); + int length = 0; + do { + byte b = buffer.read(invert); + if (b == 0) { + // end of string + break; + } + if (b == 1) { + // the last char is an escape char. read the actual char + buffer.read(invert); + } + length++; + } while (true); + + if (length == buffer.tell() - start) { + // No escaping happened, so we are already done. + r.set(buffer.getData(), start, length); + } else { + // Escaping happened, we need to copy byte-by-byte. + // 1. Set the length first. + r.set(buffer.getData(), start, length); + // 2. Reset the pointer. + buffer.seek(start); + // 3. Copy the data. + byte[] rdata = r.getBytes(); + for (int i = 0; i < length; i++) { + byte b = buffer.read(invert); + if (b == 1) { + // The last char is an escape char, read the actual char. + // The serialization format escape \0 to \1, and \1 to \2, + // to make sure the string is null-terminated. + b = (byte) (buffer.read(invert) - 1); + } + rdata[i] = b; + } + // 4. Read the null terminator. + byte b = buffer.read(invert); + assert (b == 0); + } + return r; + } + BytesWritable serializeBytesWritable = new BytesWritable(); OutputByteBuffer outputByteBuffer = new OutputByteBuffer(); @@ -572,7 +606,7 @@ } static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, - boolean invert) { + boolean invert) throws SerDeException { // Is this field a null? if (o == null) { buffer.write((byte) 0, invert); @@ -668,6 +702,18 @@ return; } + case VARCHAR: { + HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector)poi; + HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o); + try { + ByteBuffer bb = Text.encode(hc.getHiveVarchar().getValue()); + serializeBytes(buffer, bb.array(), bb.limit(), invert); + } catch (CharacterCodingException err) { + throw new SerDeException(err); + } + return; + } + case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable ba = baoi.getPrimitiveWritableObject(o); Index: serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java @@ -35,8 +35,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.thrift.ConfigurableTProtocol; import org.apache.hadoop.hive.serde2.thrift.TReflectionUtils; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -173,9 +175,11 @@ dynamicSerDeStructBaseToObjectInspector(btMap.getKeyType()), dynamicSerDeStructBaseToObjectInspector(btMap.getValueType())); } else if (bt.isPrimitive()) { + PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils + .getTypeEntryFromPrimitiveJavaClass(bt.getRealType()); return PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils - .getTypeEntryFromPrimitiveJavaClass(bt.getRealType()).primitiveCategory); + .getPrimitiveJavaObjectInspector(pte.primitiveCategory, + ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveTypeEntry(pte)); } else { // Must be a struct DynamicSerDeStructBase btStruct = (DynamicSerDeStructBase) bt; Index: serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/DoubleWritable.java @@ -17,7 +17,7 @@ */ /** - * This file is back-ported from hadoop-0.19, to make sure hive can run + * This file is back-ported from hadoop-0.19, to make sure hive can run * with hadoop-0.17. */ package org.apache.hadoop.hive.serde2.io; Index: serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java @@ -0,0 +1,125 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.shims.ShimLoader; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; + +public class HiveVarcharWritable implements WritableComparable{ + protected Text value = new Text(); + transient protected int characterLength = -1; + + public HiveVarcharWritable() { + } + + public HiveVarcharWritable(HiveVarchar hc) { + set(hc); + } + + public HiveVarcharWritable(HiveVarcharWritable hcw) { + set(hcw); + } + + public void set(HiveVarchar val) { + set(val.getValue()); + } + + public void set(String val) { + set(val, -1); // copy entire string value + } + + public void set(HiveVarcharWritable val) { + value.set(val.value); + characterLength = val.characterLength; + } + + public void set(HiveVarcharWritable val, int maxLength) { + if (val.characterLength > 0 && val.characterLength >= maxLength) { + set(val); + } else { + set(val.getHiveVarchar(), maxLength); + } + } + + public void set(HiveVarchar val, int len) { + set(val.getValue(), len); + } + + public void set(String val, int maxLength) { + value.set(HiveBaseChar.enforceMaxLength(val, maxLength)); + } + + public HiveVarchar getHiveVarchar() { + return new HiveVarchar(value.toString(), -1); + } + + public int getCharacterLength() { + if (characterLength < 0) { + characterLength = getHiveVarchar().getCharacterLength(); + } + return characterLength; + } + + public void enforceMaxLength(int maxLength) { + // Might be possible to truncate the existing Text value, for now just do something simple. + set(getHiveVarchar(), maxLength); + } + + public void readFields(DataInput in) throws IOException { + value.readFields(in); + } + + public void write(DataOutput out) throws IOException { + value.write(out); + } + + public int compareTo(HiveVarcharWritable rhs) { + return ShimLoader.getHadoopShims().compareText(value, rhs.value); + } + + public boolean equals(Object obj) { + if (obj == null || !(obj instanceof HiveVarcharWritable)) { + return false; + } + return value.equals(((HiveVarcharWritable)obj).value); + } + + @Override + public String toString() { + return value.toString(); + } + + public int hashCode() { + return value.hashCode(); + } + + /** + * Access to the internal Text member. Use with care. + * @return + */ + public Text getTextValue() { + return value; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; @@ -53,8 +54,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -112,6 +115,8 @@ return new LazyDouble((LazyDoubleObjectInspector) oi); case STRING: return new LazyString((LazyStringObjectInspector) oi); + case VARCHAR: + return new LazyHiveVarchar((LazyHiveVarcharObjectInspector) oi); case DATE: return new LazyDate((LazyDateObjectInspector) oi); case TIMESTAMP: @@ -215,8 +220,12 @@ ObjectInspector.Category c = typeInfo.getCategory(); switch (c) { case PRIMITIVE: + BaseTypeParams typeParams = null; + if (typeInfo instanceof ParameterizedPrimitiveTypeInfo) { + typeParams = ((ParameterizedPrimitiveTypeInfo)typeInfo).getParameters(); + } return LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector( - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), escaped, + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), typeParams, escaped, escapeChar); case MAP: return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java @@ -0,0 +1,73 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import java.nio.charset.CharacterCodingException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.io.Text; + +/** + * LazyObject for storing a value of HiveVarchar. + * + */ +public class LazyHiveVarchar extends + LazyPrimitive { + + private static final Log LOG = LogFactory.getLog(LazyHiveVarchar.class); + + protected int maxLength = -1; + + public LazyHiveVarchar(LazyHiveVarcharObjectInspector oi) { + super(oi); + VarcharTypeParams typeParams = (VarcharTypeParams)oi.getTypeParams(); + if (typeParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + maxLength = typeParams.getLength(); + data = new HiveVarcharWritable(); + } + + public LazyHiveVarchar(LazyHiveVarchar copy) { + super(copy); + this.maxLength = copy.maxLength; + data = new HiveVarcharWritable(copy.data); + } + + public void setValue(LazyHiveVarchar copy) { + data.set(copy.data, maxLength); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + String byteData = null; + try { + byteData = Text.decode(bytes.getData(), start, length); + data.set(byteData, maxLength); + isNull = false; + } catch (CharacterCodingException e) { + isNull = true; + LOG.debug("Data not in the HiveVarchar data type range so converted to null.", e); + } + } + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -31,14 +31,16 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; @@ -225,6 +227,12 @@ break; } + case VARCHAR: { + HiveVarcharWritable hc = ((HiveVarcharObjectInspector)oi).getPrimitiveWritableObject(o); + ByteBuffer b = Text.encode(hc.toString()); + writeEscaped(out, b.array(), 0, b.limit(), escaped, escapeChar, needsEscape); + break; + } case BINARY: { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); byte[] toEncode = new byte[bw.getLength()]; Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyHiveVarcharObjectInspector.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; + + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyHiveVarchar; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ParameterizedObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; + +public class LazyHiveVarcharObjectInspector + extends AbstractPrimitiveLazyObjectInspector + implements HiveVarcharObjectInspector, ParameterizedObjectInspector { + + private VarcharTypeParams typeParams; + + protected LazyHiveVarcharObjectInspector(PrimitiveTypeEntry typeEntry) { + super(typeEntry); + if (typeEntry.primitiveCategory != PrimitiveCategory.VARCHAR) { + throw new RuntimeException( + "TypeEntry of type varchar expected, got " + typeEntry.primitiveCategory); + } + } + + @Override + public Object copyObject(Object o) { + if (o == null) { + return null; + } + + LazyHiveVarchar ret = new LazyHiveVarchar(this); + ret.setValue((LazyHiveVarchar) o); + return ret; + } + + @Override + public HiveVarchar getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + + HiveVarchar ret = ((LazyHiveVarchar) o).getWritableObject().getHiveVarchar(); + if (!ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams(ret, typeParams)) { + HiveVarchar newValue = new HiveVarchar(ret, typeParams.length); + return newValue; + } + return ret; + } + + @Override + public void setTypeParams(BaseTypeParams newParams) { + typeParams = (VarcharTypeParams)newParams; + } + + @Override + public BaseTypeParams getTypeParams() { + return typeParams; + } + + public String toString() { + return getTypeName(); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -21,7 +21,12 @@ import java.util.ArrayList; import java.util.HashMap; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ParameterizedObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; /** * LazyPrimitiveObjectInspectorFactory is the primary way to create new @@ -61,6 +66,8 @@ new LazyBinaryObjectInspector(); public static final LazyHiveDecimalObjectInspector LAZY_BIG_DECIMAL_OBJECT_INSPECTOR = new LazyHiveDecimalObjectInspector(); + public static final LazyHiveVarcharObjectInspector LAZY_VARCHAR_OBJECT_INSPECTOR = + new LazyHiveVarcharObjectInspector(PrimitiveObjectInspectorUtils.varcharTypeEntry); static HashMap, LazyStringObjectInspector> cachedLazyStringObjectInspector = new HashMap, LazyStringObjectInspector>(); @@ -79,9 +86,37 @@ return result; } + static PrimitiveObjectInspectorUtils.ParameterizedObjectInspectorMap + cachedParameterizedLazyObjectInspectors = + new PrimitiveObjectInspectorUtils.ParameterizedObjectInspectorMap(); + + public static ParameterizedObjectInspector getParameterizedObjectInspector( + PrimitiveCategory primitiveCategory, + BaseTypeParams typeParams) { + ParameterizedObjectInspector poi = + cachedParameterizedLazyObjectInspectors.getObjectInspector(primitiveCategory, typeParams); + if (poi == null) { + // Object inspector hasn't been cached for this type/params yet, create now + switch (primitiveCategory) { + case VARCHAR: + PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, + typeParams); + poi = new LazyHiveVarcharObjectInspector(typeEntry); + poi.setTypeParams(typeParams); + cachedParameterizedLazyObjectInspectors.setObjectInspector(poi); + break; + + default: + throw new RuntimeException( + "Primitve type " + primitiveCategory + " should not take parameters"); + } + } + + return poi; + } public static AbstractPrimitiveLazyObjectInspector getLazyObjectInspector( PrimitiveCategory primitiveCategory, boolean escaped, byte escapeChar) { - switch (primitiveCategory) { case BOOLEAN: return LAZY_BOOLEAN_OBJECT_INSPECTOR; @@ -99,6 +134,8 @@ return LAZY_DOUBLE_OBJECT_INSPECTOR; case STRING: return getLazyStringObjectInspector(escaped, escapeChar); + case VARCHAR: + return LAZY_VARCHAR_OBJECT_INSPECTOR; case BINARY: return LAZY_BINARY_OBJECT_INSPECTOR; case VOID: @@ -115,6 +152,24 @@ } } + public static AbstractPrimitiveLazyObjectInspector getLazyObjectInspector( + PrimitiveCategory primitiveCategory, BaseTypeParams typeParams, boolean escaped, byte escapeChar) { + + if (typeParams == null) { + return getLazyObjectInspector(primitiveCategory, escaped, escapeChar); + } else { + switch(primitiveCategory) { + case VARCHAR: + LazyHiveVarcharObjectInspector oi = (LazyHiveVarcharObjectInspector) + getParameterizedObjectInspector(primitiveCategory, typeParams); + return oi; + + default: + throw new RuntimeException("Type " + primitiveCategory + " does not take parameters"); + } + } + } + private LazyPrimitiveObjectInspectorFactory() { // prevent instantiation } Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; @@ -71,6 +72,8 @@ return new LazyBinaryDouble((WritableDoubleObjectInspector) oi); case STRING: return new LazyBinaryString((WritableStringObjectInspector) oi); + case VARCHAR: + return new LazyBinaryHiveVarchar((WritableHiveVarcharObjectInspector) oi); case VOID: // for NULL return new LazyBinaryVoid((WritableVoidObjectInspector) oi); case DATE: Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.io.Text; + +public class LazyBinaryHiveVarchar extends + LazyBinaryPrimitive { + + protected int maxLength = -1; + + LazyBinaryHiveVarchar(WritableHiveVarcharObjectInspector oi) { + super(oi); + // Check for params + VarcharTypeParams typeParams = (VarcharTypeParams)oi.getTypeParams(); + if (typeParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + maxLength = typeParams.length; + data = new HiveVarcharWritable(); + } + + LazyBinaryHiveVarchar(LazyBinaryHiveVarchar copy) { + super(copy); + maxLength = copy.maxLength; + data = new HiveVarcharWritable(copy.data); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + // re-use existing text member in varchar writable + Text textValue = data.getTextValue(); + textValue.set(bytes.getData(), start, length); + data.enforceMaxLength(maxLength); + } + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.serde2.lazybinary; +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -44,6 +46,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -231,7 +235,7 @@ * once already */ private static boolean serializeStruct(Output byteStream, Object obj, - StructObjectInspector soi, boolean warnedOnceNullMapKey) { + StructObjectInspector soi, boolean warnedOnceNullMapKey) throws SerDeException { // do nothing for null struct if (null == obj) { return warnedOnceNullMapKey; @@ -284,7 +288,8 @@ * once already */ public static boolean serialize(Output byteStream, Object obj, - ObjectInspector objInspector, boolean skipLengthPrefix, boolean warnedOnceNullMapKey) { + ObjectInspector objInspector, boolean skipLengthPrefix, boolean warnedOnceNullMapKey) + throws SerDeException { // do nothing for null object if (null == obj) { @@ -363,7 +368,24 @@ byteStream.write(data, 0, length); return warnedOnceNullMapKey; } - + case VARCHAR: { + HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi; + String value = + hcoi.getPrimitiveWritableObject(obj).getHiveVarchar().getValue(); + int length = value.length(); + // Write byte size + if (!skipLengthPrefix) { + LazyBinaryUtils.writeVInt(byteStream, length); + } + // Write string value + try { + ByteBuffer bb = Text.encode(value); + byteStream.write(bb.array(), 0, bb.limit()); + } catch (CharacterCodingException err) { + throw new SerDeException(err); + } + return warnedOnceNullMapKey; + } case BINARY: { BinaryObjectInspector baoi = (BinaryObjectInspector) poi; BytesWritable bw = baoi.getPrimitiveWritableObject(obj); Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -33,6 +33,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -190,6 +192,11 @@ recordInfo.elementSize = vInt.value; break; + case VARCHAR: + LazyBinaryUtils.readVInt(bytes, offset, vInt); + recordInfo.elementOffset = vInt.length; + recordInfo.elementSize = vInt.value; + break; case BINARY: // using vint instead of 4 bytes LazyBinaryUtils.readVInt(bytes, offset, vInt); @@ -388,7 +395,8 @@ case PRIMITIVE: { result = PrimitiveObjectInspectorFactory .getPrimitiveWritableObjectInspector(((PrimitiveTypeInfo) typeInfo) - .getPrimitiveCategory()); + .getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo)); break; } case LIST: { Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector; @@ -38,6 +39,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableTimestampObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; /** * ObjectInspectorConverters. @@ -101,6 +103,10 @@ return new PrimitiveObjectInspectorConverter.StringConverter( inputOI); } + case VARCHAR: + return new PrimitiveObjectInspectorConverter.HiveVarcharConverter( + inputOI, + (SettableHiveVarcharObjectInspector) outputOI); case DATE: return new PrimitiveObjectInspectorConverter.DateConverter( inputOI, @@ -165,7 +171,8 @@ case PRIMITIVE: PrimitiveObjectInspector primInputOI = (PrimitiveObjectInspector) inputOI; return PrimitiveObjectInspectorFactory. - getPrimitiveWritableObjectInspector(primInputOI.getPrimitiveCategory()); + getPrimitiveWritableObjectInspector(primInputOI.getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveObjectInspector(primInputOI)); case STRUCT: return inputOI; case LIST: Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -31,6 +31,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -42,13 +43,16 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.shims.ShimLoader; import org.apache.hadoop.io.BytesWritable; @@ -86,8 +90,9 @@ if (oi.getCategory() == Category.PRIMITIVE) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (!(poi instanceof AbstractPrimitiveWritableObjectInspector)) { - return PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + poi.getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveObjectInspector(poi)); } } return oi; @@ -108,25 +113,29 @@ switch (oi.getCategory()) { case PRIMITIVE: { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + BaseTypeParams typeParams = + ParameterizedPrimitiveTypeUtils.getTypeParamsFromPrimitiveObjectInspector(poi); switch (objectInspectorOption) { case DEFAULT: { if (poi.preferWritable()) { - result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + poi.getPrimitiveCategory(), + typeParams); } else { result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(poi.getPrimitiveCategory()); + .getPrimitiveJavaObjectInspector(poi.getPrimitiveCategory(), typeParams); } break; } case JAVA: { result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(poi.getPrimitiveCategory()); + .getPrimitiveJavaObjectInspector(poi.getPrimitiveCategory(), typeParams); break; } case WRITABLE: { - result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + poi.getPrimitiveCategory(), + typeParams); break; } } @@ -487,6 +496,8 @@ } return r; } + case VARCHAR: + return ((HiveVarcharObjectInspector)poi).getPrimitiveWritableObject(o).hashCode(); case BINARY: return ((BinaryObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); @@ -682,6 +693,11 @@ .compareTo(s2)); } } + case VARCHAR: { + HiveVarcharWritable t1 = ((HiveVarcharObjectInspector)poi1).getPrimitiveWritableObject(o1); + HiveVarcharWritable t2 = ((HiveVarcharObjectInspector)poi2).getPrimitiveWritableObject(o2); + return t1.compareTo(t2); + } case BINARY: { BytesWritable bw1 = ((BinaryObjectInspector) poi1).getPrimitiveWritableObject(o1); BytesWritable bw2 = ((BinaryObjectInspector) poi2).getPrimitiveWritableObject(o2); Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java @@ -28,7 +28,7 @@ */ public static enum PrimitiveCategory { VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, - DATE, TIMESTAMP, BINARY, DECIMAL, UNKNOWN + DATE, TIMESTAMP, BINARY, DECIMAL, VARCHAR, UNKNOWN }; /** Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java @@ -76,7 +76,7 @@ */ @Override public String getTypeName() { - return typeEntry.typeName; + return typeEntry.toString(); } } Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveVarcharObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/HiveVarcharObjectInspector.java @@ -0,0 +1,28 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +public interface HiveVarcharObjectInspector extends PrimitiveObjectInspector { + HiveVarcharWritable getPrimitiveWritableObject(Object o); + + HiveVarchar getPrimitiveJavaObject(Object o); +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaHiveVarcharObjectInspector.java @@ -0,0 +1,107 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +public class JavaHiveVarcharObjectInspector + extends AbstractPrimitiveJavaObjectInspector + implements SettableHiveVarcharObjectInspector, ParameterizedObjectInspector { + + private VarcharTypeParams typeParams; + + protected JavaHiveVarcharObjectInspector() { + super(PrimitiveObjectInspectorUtils.varcharTypeEntry); + } + + public HiveVarchar getPrimitiveJavaObject(Object o) { + if (o == null) { + return null; + } + HiveVarchar value = (HiveVarchar)o; + if (ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams(value, typeParams)) { + return value; + } + // value needs to be converted to match the type params (length, etc). + return getPrimitiveWithParams(value); + } + + @Override + public HiveVarcharWritable getPrimitiveWritableObject(Object o) { + if (o == null) { + return null; + } + return getWritableWithParams((HiveVarchar)o); + } + + private HiveVarchar getPrimitiveWithParams(HiveVarchar val) { + HiveVarchar hv = new HiveVarchar(val, getMaxLength()); + return hv; + } + + private HiveVarcharWritable getWritableWithParams(HiveVarchar val) { + HiveVarcharWritable newValue = new HiveVarcharWritable(); + newValue.set(val, getMaxLength()); + return newValue; + } + + @Override + public Object set(Object o, HiveVarchar value) { + HiveVarchar setValue = (HiveVarchar)o; + if (ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams(value, typeParams)) { + setValue.setValue(value); + } else { + // Otherwise value may be too long, convert to appropriate value based on params + setValue.setValue(value, getMaxLength()); + } + + return setValue; + } + + @Override + public Object set(Object o, String value) { + HiveVarchar convertedValue = (HiveVarchar)o; + convertedValue.setValue(value, getMaxLength()); + return convertedValue; + } + + @Override + public Object create(HiveVarchar value) { + HiveVarchar hc = new HiveVarchar(value, getMaxLength()); + return hc; + } + + @Override + public void setTypeParams(BaseTypeParams newParams) { + typeParams = (VarcharTypeParams)newParams; + } + + @Override + public BaseTypeParams getTypeParams() { + return typeParams; + } + + public int getMaxLength() { + return typeParams != null ? typeParams.length : -1; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/ParameterizedObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/ParameterizedObjectInspector.java @@ -0,0 +1,29 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; + +public interface ParameterizedObjectInspector extends PrimitiveObjectInspector { + + void setTypeParams(BaseTypeParams newParams); + + BaseTypeParams getTypeParams(); + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -21,12 +21,15 @@ import java.sql.Date; import java.sql.Timestamp; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.Text; /** @@ -390,6 +393,14 @@ t.set(((StringObjectInspector) inputOI).getPrimitiveJavaObject(input)); } return t; + case VARCHAR: + // TODO: space padded value or stripped value when converting to string? + if (inputOI.preferWritable()) { + t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); + } else { + t.set(((HiveVarcharObjectInspector) inputOI).getPrimitiveJavaObject(input).toString()); + } + return t; case DATE: t.set(((DateObjectInspector) inputOI).getPrimitiveWritableObject(input).toString()); return t; @@ -426,4 +437,30 @@ } } + + public static class HiveVarcharConverter implements Converter { + + PrimitiveObjectInspector inputOI; + SettableHiveVarcharObjectInspector outputOI; + HiveVarcharWritable hc; + + public HiveVarcharConverter(PrimitiveObjectInspector inputOI, SettableHiveVarcharObjectInspector outputOI) { + this.inputOI = inputOI; + this.outputOI = outputOI; + VarcharTypeParams typeParams = null; + if (outputOI instanceof ParameterizedObjectInspector) { + typeParams = (VarcharTypeParams)((ParameterizedObjectInspector)outputOI).getTypeParams(); + } + if (typeParams == null) { +// throw new RuntimeException("varchar type used without type params"); + } + hc = new HiveVarcharWritable(); + } + + @Override + public Object convert(Object input) { + return outputOI.set(hc, PrimitiveObjectInspectorUtils.getHiveVarchar(input, inputOI)); + } + + } } Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -20,6 +20,7 @@ import java.util.HashMap; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -29,7 +30,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.ParameterizedObjectInspectorMap; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -65,6 +68,8 @@ new JavaDoubleObjectInspector(); public static final JavaStringObjectInspector javaStringObjectInspector = new JavaStringObjectInspector(); + public static final JavaHiveVarcharObjectInspector javaHiveVarcharObjectInspector = + new JavaHiveVarcharObjectInspector(); public static final JavaVoidObjectInspector javaVoidObjectInspector = new JavaVoidObjectInspector(); public static final JavaDateObjectInspector javaDateObjectInspector = @@ -92,6 +97,8 @@ new WritableDoubleObjectInspector(); public static final WritableStringObjectInspector writableStringObjectInspector = new WritableStringObjectInspector(); + public static final WritableHiveVarcharObjectInspector writableHiveVarcharObjectInspector = + new WritableHiveVarcharObjectInspector(PrimitiveObjectInspectorUtils.varcharTypeEntry); public static final WritableVoidObjectInspector writableVoidObjectInspector = new WritableVoidObjectInspector(); public static final WritableDateObjectInspector writableDateObjectInspector = @@ -122,6 +129,8 @@ writableDoubleObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.STRING, writableStringObjectInspector); + cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.VARCHAR, + writableHiveVarcharObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.VOID, writableVoidObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.DATE, @@ -153,6 +162,8 @@ javaDoubleObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.STRING, javaStringObjectInspector); + cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.VARCHAR, + javaHiveVarcharObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.VOID, javaVoidObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.DATE, @@ -166,6 +177,20 @@ } /** + * Cached Writable object inspectors for parameterized primitive types. + */ + private static ParameterizedObjectInspectorMap + cachedParameterizedPrimitiveWritableObjectInspectorCache = + new ParameterizedObjectInspectorMap(); + + /** + * Cached Java object inspectors for parameterized primitive types. + */ + private static ParameterizedObjectInspectorMap + cachedParameterizedPrimitiveJavaObjectInspectorCache = + new ParameterizedObjectInspectorMap(); + + /** * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory. * * @param primitiveCategory @@ -182,6 +207,53 @@ } /** + * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory, with option to + * pass in parameters for the primitive type (such as char(10)). + * Ideally this method should be used over the method without type parameters, + * and the type parameters (or lack of parameters) can be determined from + * the input ObjectInspector, TypeInfo, or TypeEntry. + * However there are situations where it is not possible to get any information about + * type parameters, such as when getting an object inspector based on reflection from + * the java or primitive class. + * @param primitiveCategory Primitve type category + * @param primitiveTypeParams Type parameters for the primitve type. + * Set to null if there are no type parameters + * @return + */ + public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjectInspector( + PrimitiveCategory primitiveCategory, + BaseTypeParams primitiveTypeParams) { + if (primitiveTypeParams == null) { + // No type params, just search the unparameterized types + return getPrimitiveWritableObjectInspector(primitiveCategory); + } else { + // Check our cached set of parameterized object inspectors for the primitive category, + // or create a new object inspector if one doesn't exist yet. + ParameterizedObjectInspector oi = + cachedParameterizedPrimitiveWritableObjectInspectorCache.getObjectInspector( + primitiveCategory, + primitiveTypeParams); + if (oi == null) { + // Do a bit of validation - not all primitive types use parameters. + switch (primitiveCategory) { + case VARCHAR: + PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + primitiveCategory, + primitiveTypeParams); + oi = new WritableHiveVarcharObjectInspector(typeEntry); + oi.setTypeParams(primitiveTypeParams); + cachedParameterizedPrimitiveWritableObjectInspectorCache.setObjectInspector(oi); + break; + default: + throw new RuntimeException( + "Primitve type " + primitiveCategory + " should not take parameters"); + } + } + return (AbstractPrimitiveWritableObjectInspector)oi; + } + } + + /** * Returns a PrimitiveWritableObjectInspector which implements ConstantObjectInspector * for the PrimitiveCategory. * @@ -207,6 +279,8 @@ return new WritableConstantDoubleObjectInspector((DoubleWritable)value); case STRING: return new WritableConstantStringObjectInspector((Text)value); + case VARCHAR: + return new WritableConstantHiveVarcharObjectInspector((HiveVarcharWritable)value); case DATE: return new WritableConstantDateObjectInspector((DateWritable)value); case TIMESTAMP: @@ -240,6 +314,49 @@ } /** + * Returns the PrimitiveJavaObjectInspector for the PrimitiveCategory, with option to + * pass in parameters for the primitive type (such as char(10)). + * Ideally this method should be used over the method without type parameters, + * and the type parameters (or lack of parameters) can be determined from + * the input ObjectInspector, TypeInfo, or TypeEntry. + * However there are situations where it is not possible to get any information about + * type parameters, such as when getting an object inspector based on reflection from + * the java or primitive class. + * @param primitiveCategory Primitve type category + * @param primitiveTypeParams Type parameters for the primitve type. + * Set to null if there are no type parameters + * @return + */ + public static AbstractPrimitiveJavaObjectInspector getPrimitiveJavaObjectInspector( + PrimitiveCategory primitiveCategory, + BaseTypeParams primitiveTypeParams) { + if (primitiveTypeParams == null) { + // No type params, just search the unparameterized types + return getPrimitiveJavaObjectInspector(primitiveCategory); + } else { + // Check our cached set of parameterized object inspectors for the primitive category, + // or create a new object inspector if one doesn't exist yet. + ParameterizedObjectInspector oi = + cachedParameterizedPrimitiveJavaObjectInspectorCache.getObjectInspector( + primitiveCategory, + primitiveTypeParams); + if (oi == null) { + // Do a bit of validation - not all primitive types use parameters. + switch (primitiveCategory) { + case VARCHAR: + oi = new JavaHiveVarcharObjectInspector(); + oi.setTypeParams(primitiveTypeParams); + cachedParameterizedPrimitiveJavaObjectInspectorCache.setObjectInspector(oi); + break; + default: + throw new RuntimeException( + "Primitve type " + primitiveCategory + " should not take parameters"); + } + } + return (AbstractPrimitiveJavaObjectInspector)oi; + } + } + /** * Returns an ObjectInspector for a primitive Class. The Class can be a Hive * Writable class, or a Java Primitive Class. * Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -26,11 +26,16 @@ import java.util.HashMap; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; @@ -40,6 +45,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -58,11 +65,12 @@ * ObjectInspector to return to the caller of SerDe2.getObjectInspector(). */ public final class PrimitiveObjectInspectorUtils { + private static Log LOG = LogFactory.getLog(PrimitiveObjectInspectorUtils.class); /** * TypeEntry stores information about a Hive Primitive TypeInfo. */ - public static class PrimitiveTypeEntry implements Writable { + public static class PrimitiveTypeEntry implements Writable, Cloneable { /** * The category of the PrimitiveType. @@ -87,39 +95,125 @@ * typeName is the name of the type as in DDL. */ public String typeName; + public Class typeParamsClass; + public BaseTypeParams typeParams; PrimitiveTypeEntry( PrimitiveObjectInspector.PrimitiveCategory primitiveCategory, String typeName, Class primitiveType, Class javaClass, - Class hiveClass) { + Class hiveClass, ClassparamsClass) { this.primitiveCategory = primitiveCategory; primitiveJavaType = primitiveType; primitiveJavaClass = javaClass; primitiveWritableClass = hiveClass; + typeParamsClass = paramsClass; this.typeName = typeName; } @Override public void readFields(DataInput in) throws IOException { primitiveCategory = WritableUtils.readEnum(in, PrimitiveObjectInspector.PrimitiveCategory.class); typeName = WritableUtils.readString(in); + int typeParamsIndicator = WritableUtils.readVInt(in); try { primitiveJavaType = Class.forName(WritableUtils.readString(in)); primitiveJavaClass = Class.forName(WritableUtils.readString(in)); primitiveWritableClass = Class.forName(WritableUtils.readString(in)); + if (typeParamsIndicator == 1) { + typeParamsClass = Class.forName(WritableUtils.readString(in)); + typeParams = (BaseTypeParams)typeParamsClass.newInstance(); + typeParams.readFields(in); + } else { + typeParamsClass = null; + typeParams = null; + } } catch (ClassNotFoundException e) { throw new IOException(e); + } catch (IllegalAccessException e) { + throw new IOException(e); + } catch (InstantiationException e) { + throw new IOException(e); } } @Override public void write(DataOutput out) throws IOException { + int typeParamsIndicator = (isParameterized() && typeParams != null) ? 1 : 0; + WritableUtils.writeEnum(out, primitiveCategory); WritableUtils.writeString(out, typeName); + WritableUtils.writeVInt(out, typeParamsIndicator); WritableUtils.writeString(out, primitiveJavaType.getName()); WritableUtils.writeString(out, primitiveJavaClass.getName()); WritableUtils.writeString(out, primitiveWritableClass.getName()); + if (typeParamsIndicator == 1) { + WritableUtils.writeString(out, typeParamsClass.getName()); + typeParams.write(out); + } + } + + public PrimitiveTypeEntry addParameters(String[] parameters) { + if (parameters == null || parameters.length == 0) { + return this; + } + + PrimitiveTypeEntry result; + try { + BaseTypeParams newTypeParams = (BaseTypeParams)typeParamsClass.newInstance(); + newTypeParams.set(parameters); + String typeNameWithParams = this.typeName + newTypeParams.toString(); + if (typeNameToTypeEntry.containsKey(typeNameWithParams)) { + return typeNameToTypeEntry.get(typeNameWithParams); + } + result = (PrimitiveTypeEntry)this.clone(); + result.typeParams = newTypeParams; + + PrimitiveObjectInspectorUtils.addParameterizedType(result); + + return result; + } catch (Exception err) { + LOG.error("Error while setting type parameters: " + err); + return null; + } + } + + public boolean isParameterized() { + return (null != typeParamsClass); + } + + public Object clone() { + PrimitiveTypeEntry result = new PrimitiveTypeEntry( + this.primitiveCategory, + this.typeName, + this.primitiveJavaType, + this.primitiveJavaClass, + this.primitiveWritableClass, + this.typeParamsClass); + return result; + } + + public String toString() { + if (typeParams != null) { + return typeName + typeParams.toString(); + } + return typeName; + } + + public static BaseTypeParams createTypeParams(String typeName, String[] parameters) + throws SerDeException { + try { + PrimitiveTypeEntry typeEntry = getTypeEntryFromTypeName(typeName); + if (typeEntry != null && typeEntry.typeParamsClass != null) { + BaseTypeParams newTypeParams = (BaseTypeParams)typeEntry.typeParamsClass.newInstance(); + newTypeParams.set(parameters); + return newTypeParams; + } else { + return null; + } + } catch (Exception err) { + throw new SerDeException("Error creating type params for " + typeName, err); + } } } @@ -129,6 +223,10 @@ static final Map, PrimitiveTypeEntry> primitiveWritableClassToTypeEntry = new HashMap, PrimitiveTypeEntry>(); static final Map typeNameToTypeEntry = new HashMap(); + static void addParameterizedType(PrimitiveTypeEntry t) { + typeNameToTypeEntry.put(t.toString(), t); + } + static void registerType(PrimitiveTypeEntry t) { if (t.primitiveCategory != PrimitiveCategory.UNKNOWN) { primitiveCategoryToTypeEntry.put(t.primitiveCategory, t); @@ -149,53 +247,57 @@ public static final PrimitiveTypeEntry binaryTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.BINARY, serdeConstants.BINARY_TYPE_NAME, byte[].class, - byte[].class, BytesWritable.class); + byte[].class, BytesWritable.class, null); public static final PrimitiveTypeEntry stringTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.STRING, serdeConstants.STRING_TYPE_NAME, null, String.class, - Text.class); + Text.class, null); public static final PrimitiveTypeEntry booleanTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.BOOLEAN, serdeConstants.BOOLEAN_TYPE_NAME, Boolean.TYPE, - Boolean.class, BooleanWritable.class); + Boolean.class, BooleanWritable.class, null); public static final PrimitiveTypeEntry intTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.INT, serdeConstants.INT_TYPE_NAME, Integer.TYPE, - Integer.class, IntWritable.class); + Integer.class, IntWritable.class, null); public static final PrimitiveTypeEntry longTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.LONG, serdeConstants.BIGINT_TYPE_NAME, Long.TYPE, - Long.class, LongWritable.class); + Long.class, LongWritable.class, null); public static final PrimitiveTypeEntry floatTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.FLOAT, serdeConstants.FLOAT_TYPE_NAME, Float.TYPE, - Float.class, FloatWritable.class); + Float.class, FloatWritable.class, null); public static final PrimitiveTypeEntry voidTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.VOID, serdeConstants.VOID_TYPE_NAME, Void.TYPE, Void.class, - NullWritable.class); + NullWritable.class, null); // No corresponding Writable classes for the following 3 in hadoop 0.17.0 public static final PrimitiveTypeEntry doubleTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DOUBLE, serdeConstants.DOUBLE_TYPE_NAME, Double.TYPE, - Double.class, DoubleWritable.class); + Double.class, DoubleWritable.class, null); public static final PrimitiveTypeEntry byteTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.BYTE, serdeConstants.TINYINT_TYPE_NAME, Byte.TYPE, - Byte.class, ByteWritable.class); + Byte.class, ByteWritable.class, null); public static final PrimitiveTypeEntry shortTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.SHORT, serdeConstants.SMALLINT_TYPE_NAME, Short.TYPE, - Short.class, ShortWritable.class); + Short.class, ShortWritable.class, null); public static final PrimitiveTypeEntry dateTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DATE, serdeConstants.DATE_TYPE_NAME, null, - Date.class, DateWritable.class); + Date.class, DateWritable.class, null); public static final PrimitiveTypeEntry timestampTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME, null, - Timestamp.class, TimestampWritable.class); + Timestamp.class, TimestampWritable.class, null); public static final PrimitiveTypeEntry decimalTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DECIMAL, serdeConstants.DECIMAL_TYPE_NAME, null, - HiveDecimal.class, HiveDecimalWritable.class); + HiveDecimal.class, HiveDecimalWritable.class, null); + public static final PrimitiveTypeEntry varcharTypeEntry = new PrimitiveTypeEntry( + PrimitiveCategory.VARCHAR, serdeConstants.VARCHAR_TYPE_NAME, null, HiveVarchar.class, + HiveVarcharWritable.class, VarcharTypeParams.class); // The following is a complex type for special handling public static final PrimitiveTypeEntry unknownTypeEntry = new PrimitiveTypeEntry( - PrimitiveCategory.UNKNOWN, "unknown", null, Object.class, null); + PrimitiveCategory.UNKNOWN, "unknown", null, Object.class, null, null); static { registerType(binaryTypeEntry); registerType(stringTypeEntry); + registerType(varcharTypeEntry); registerType(booleanTypeEntry); registerType(intTypeEntry); registerType(longTypeEntry); @@ -317,6 +419,23 @@ return typeNameToTypeEntry.get(typeName); } + public static PrimitiveTypeEntry getTypeEntryFromTypeSpecs( + PrimitiveCategory primitiveCategory, + BaseTypeParams typeParams) { + String typeString = primitiveCategory.toString().toLowerCase(); + if (typeParams != null) { + typeString += typeParams.toString(); + } + PrimitiveTypeEntry typeEntry = getTypeEntryFromTypeName(typeString); + if (typeEntry == null) { + // Parameterized type doesn't exist yet, create now. + typeEntry = (PrimitiveTypeEntry)getTypeEntryFromTypeSpecs(primitiveCategory, null).clone(); + typeEntry.typeParams = typeParams; + addParameterizedType(typeEntry); + } + return typeEntry; + } + /** * Compare 2 primitive objects. Conversion not allowed. Note that NULL does * not equal to NULL according to SQL standard. @@ -366,6 +485,10 @@ .getPrimitiveWritableObject(o2); return t1.equals(t2); } + case VARCHAR: { + return ((HiveVarcharObjectInspector)oi1).getPrimitiveWritableObject(o1) + .equals(((HiveVarcharObjectInspector)oi2).getPrimitiveWritableObject(o2)); + } case DATE: { return ((DateObjectInspector) oi1).getPrimitiveWritableObject(o1) .equals(((DateObjectInspector) oi2).getPrimitiveWritableObject(o2)); @@ -569,6 +692,10 @@ } break; } + case VARCHAR: { + result = Integer.parseInt(getString(o, oi)); + break; + } case TIMESTAMP: result = (int) (((TimestampObjectInspector) oi) .getPrimitiveWritableObject(o).getSeconds()); @@ -628,6 +755,10 @@ result = Long.parseLong(s); } break; + case VARCHAR: { + result = Long.parseLong(getString(o, oi)); + break; + } case TIMESTAMP: result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) .getSeconds(); @@ -681,6 +812,9 @@ String s = soi.getPrimitiveJavaObject(o); result = Double.parseDouble(s); break; + case VARCHAR: + result = Double.parseDouble(getString(o, oi)); + break; case TIMESTAMP: result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getDouble(); break; @@ -746,6 +880,10 @@ StringObjectInspector soi = (StringObjectInspector) oi; result = soi.getPrimitiveJavaObject(o); break; + case VARCHAR: + HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) oi; + result = hcoi.getPrimitiveJavaObject(o).toString(); + break; case DATE: result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).toString(); break; @@ -763,6 +901,28 @@ return result; } + public static HiveVarchar getHiveVarchar(Object o, PrimitiveObjectInspector oi) { + + if (o == null) { + return null; + } + + HiveVarchar result = null; + switch (oi.getPrimitiveCategory()) { + case VARCHAR: + result = ((HiveVarcharObjectInspector)oi).getPrimitiveJavaObject(o); + break; + default: + // Is there a way to provide char length here? + // It might actually be ok as long as there is an object inspector (with char length) + // receiving this value. + result = new HiveVarchar(); + result.setValue(getString(o, oi)); + break; + } + return result; + } + public static BytesWritable getBinary(Object o, PrimitiveObjectInspector oi) { if (null == o) { @@ -826,6 +986,9 @@ case STRING: result = new HiveDecimal(((StringObjectInspector) oi).getPrimitiveJavaObject(o)); break; + case VARCHAR: + result = new HiveDecimal(getString(o, oi)); + break; case TIMESTAMP: Double ts = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) .getDouble(); @@ -861,6 +1024,15 @@ result = null; } break; + case VARCHAR: { + try { + String val = getString(o, oi).trim(); + result = Date.valueOf(val); + } catch (IllegalArgumentException e) { + result = null; + } + break; + } case DATE: result = ((DateObjectInspector) oi).getPrimitiveWritableObject(o).get(); break; @@ -913,20 +1085,11 @@ break; case STRING: StringObjectInspector soi = (StringObjectInspector) oi; - String s = soi.getPrimitiveJavaObject(o).trim(); - - // Throw away extra if more than 9 decimal places - int periodIdx = s.indexOf("."); - if (periodIdx != -1) { - if (s.length() - periodIdx > 9) { - s = s.substring(0, periodIdx + 10); - } - } - try { - result = Timestamp.valueOf(s); - } catch (IllegalArgumentException e) { - result = null; - } + String s = soi.getPrimitiveJavaObject(o); + result = getTimestampFromString(s); + break; + case VARCHAR: + result = getTimestampFromString(getString(o, oi)); break; case DATE: result = new Timestamp( @@ -942,6 +1105,25 @@ return result; } + static Timestamp getTimestampFromString(String s) { + Timestamp result; + s = s.trim(); + + // Throw away extra if more than 9 decimal places + int periodIdx = s.indexOf("."); + if (periodIdx != -1) { + if (s.length() - periodIdx > 9) { + s = s.substring(0, periodIdx + 10); + } + } + try { + result = Timestamp.valueOf(s); + } catch (IllegalArgumentException e) { + result = null; + } + return result; + } + public static Class getJavaPrimitiveClassFromObjectInspector(ObjectInspector oi) { if (oi.getCategory() != Category.PRIMITIVE) { return null; @@ -952,8 +1134,72 @@ return t == null ? null : t.primitiveJavaClass; } + /** + * Provide a general grouping for each primitive data type. + */ + public static enum PrimitiveGrouping { + NUMERIC_GROUP, STRING_GROUP, BOOLEAN_GROUP, DATE_GROUP, BINARY_GROUP, UNKNOWN_GROUP + }; + + public static PrimitiveGrouping getPrimitiveGrouping(PrimitiveCategory primitiveCategory) { + switch (primitiveCategory) { + case BYTE: + case SHORT: + case INT: + case LONG: + case FLOAT: + case DOUBLE: + case DECIMAL: + return PrimitiveGrouping.NUMERIC_GROUP; + case STRING: + case VARCHAR: + return PrimitiveGrouping.STRING_GROUP; + case BOOLEAN: + return PrimitiveGrouping.BOOLEAN_GROUP; + case TIMESTAMP: + case DATE: + return PrimitiveGrouping.DATE_GROUP; + case BINARY: + return PrimitiveGrouping.BINARY_GROUP; + default: + return PrimitiveGrouping.UNKNOWN_GROUP; + } + } + private PrimitiveObjectInspectorUtils() { // prevent instantiation } + /** + * Helper class to store parameterized primitive object inspectors, which can be + * used by the various object inspector factory methods. + */ + public static class ParameterizedObjectInspectorMap { + HashMap> entries; + + public ParameterizedObjectInspectorMap() { + entries = + new HashMap>(); + } + + public ParameterizedObjectInspector getObjectInspector( + PrimitiveCategory category, + BaseTypeParams params) { + HashMap entriesForCategory = entries.get(category); + if (entriesForCategory == null) { + return null; + } + return (ParameterizedObjectInspector)entriesForCategory.get(params.toString()); + } + + public void setObjectInspector(ParameterizedObjectInspector oi) { + PrimitiveCategory category = oi.getPrimitiveCategory(); + HashMap entriesForCategory = entries.get(category); + if (entriesForCategory == null) { + entriesForCategory = new HashMap(); + entries.put(category, entriesForCategory); + } + entriesForCategory.put(oi.getTypeParams().toString(), oi); + } + } } Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveVarcharObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableHiveVarcharObjectInspector.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; + +public interface SettableHiveVarcharObjectInspector extends HiveVarcharObjectInspector { + Object set(Object o, HiveVarchar value); + + Object set(Object o, String value); + + Object create(HiveVarchar value); + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveVarcharObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantHiveVarcharObjectInspector.java @@ -0,0 +1,51 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; + +/** + * A WritableConstantHiveVarcharObjectInspector is a WritableHiveVarcharObjectInspector + * that implements ConstantObjectInspector. + */ +public class WritableConstantHiveVarcharObjectInspector extends + WritableHiveVarcharObjectInspector implements + ConstantObjectInspector, ParameterizedObjectInspector { + + protected HiveVarcharWritable value; + + WritableConstantHiveVarcharObjectInspector(HiveVarcharWritable value) { + super(PrimitiveObjectInspectorUtils.varcharTypeEntry); + this.value = value; + + // Determine character length and update type params/typeinfo accordingly. + VarcharTypeParams typeParams = new VarcharTypeParams(); + typeParams.length = this.value.getCharacterLength(); + setTypeParams(typeParams); + this.typeEntry = PrimitiveObjectInspectorUtils.getTypeEntryFromTypeSpecs( + PrimitiveCategory.VARCHAR, + typeParams); + } + + @Override + public HiveVarcharWritable getWritableConstantValue() { + return value; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; + +public class WritableHiveVarcharObjectInspector + extends AbstractPrimitiveWritableObjectInspector + implements SettableHiveVarcharObjectInspector, ParameterizedObjectInspector { + + private static final Log LOG = LogFactory.getLog(WritableHiveVarcharObjectInspector.class); + + protected VarcharTypeParams typeParams; + + protected WritableHiveVarcharObjectInspector(PrimitiveTypeEntry typeEntry) { + super(typeEntry); + if (typeEntry.primitiveCategory != PrimitiveCategory.VARCHAR) { + throw new RuntimeException( + "TypeEntry of type varchar expected, got " + typeEntry.primitiveCategory); + } + } + + @Override + public HiveVarchar getPrimitiveJavaObject(Object o) { + // check input object's length, if it doesn't match + // then output a new primitive with the correct params. + if (o == null) { + return null; + } + HiveVarcharWritable writable = ((HiveVarcharWritable)o); + if (doesWritableMatchTypeParams(writable)) { + return writable.getHiveVarchar(); + } + return getPrimitiveWithParams(writable); + } + + public HiveVarcharWritable getPrimitiveWritableObject(Object o) { + // check input object's length, if it doesn't match + // then output new writable with correct params. + if (o == null) { + return null; + } + HiveVarcharWritable writable = ((HiveVarcharWritable)o); + if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) { + return writable; + } + + return getWritableWithParams(writable); + } + + private HiveVarchar getPrimitiveWithParams(HiveVarcharWritable val) { + HiveVarchar hv = new HiveVarchar(); + hv.setValue(val.getHiveVarchar(), getMaxLength()); + return hv; + } + + private HiveVarcharWritable getWritableWithParams(HiveVarcharWritable val) { + HiveVarcharWritable newValue = new HiveVarcharWritable(); + newValue.set(val, getMaxLength()); + return newValue; + } + + private boolean doesWritableMatchTypeParams(HiveVarcharWritable writable) { + return ParameterizedPrimitiveTypeUtils.doesWritableMatchTypeParams(writable, typeParams); + } + + private boolean doesPrimitiveMatchTypeParams(HiveVarchar value) { + return ParameterizedPrimitiveTypeUtils.doesPrimitiveMatchTypeParams(value, typeParams); + } + + @Override + public Object copyObject(Object o) { + if (o == null) { + return null; + } + HiveVarcharWritable writable = (HiveVarcharWritable)o; + if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) { + return new HiveVarcharWritable(writable); + } + return getWritableWithParams(writable); + } + + @Override + public Object set(Object o, HiveVarchar value) { + HiveVarcharWritable writable = (HiveVarcharWritable)o; + writable.set(value, getMaxLength()); + return o; + } + + @Override + public Object set(Object o, String value) { + HiveVarcharWritable writable = (HiveVarcharWritable)o; + writable.set(value, getMaxLength()); + return o; + } + + @Override + public Object create(HiveVarchar value) { + HiveVarcharWritable ret; + ret = new HiveVarcharWritable(); + ret.set(value, getMaxLength()); + return ret; + } + + public void setTypeParams(BaseTypeParams newParams) { + typeParams = (VarcharTypeParams)newParams; + } + + public BaseTypeParams getTypeParams() { + return typeParams; + } + + public int getMaxLength() { + return typeParams != null ? typeParams.length : -1; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseTypeParams.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseTypeParams.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.Serializable; + +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.io.Writable; + +/** + * Base type for type-specific params, such as char(10) or decimal(10, 2). + */ +public abstract class BaseTypeParams implements Writable, Serializable { + + private static final long serialVersionUID = 1L; + + public abstract void validateParams() throws SerDeException; + + public abstract void populateParams(String[] params) throws SerDeException; + + public abstract String toString(); + + public void set(String[] params) throws SerDeException { + populateParams(params); + validateParams(); + } + + // Needed for conversion to/from TypeQualifiers. Override in subclasses. + public boolean hasCharacterMaximumLength() { + return false; + } + public boolean hasNumericPrecision() { + return false; + } + public boolean hasNumericScale() { + return false; + } + public int getCharacterMaximumLength() { + return -1; + } + public int getNumericPrecision() { + return -1; + } + public int getNumericScale() { + return -1; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeInfo.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeInfo.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.Serializable; + +public class ParameterizedPrimitiveTypeInfo extends PrimitiveTypeInfo + implements Serializable { + private static final long serialVersionUID = 1L; + + private BaseTypeParams typeParams; + + public ParameterizedPrimitiveTypeInfo() { + super(); + } + + public ParameterizedPrimitiveTypeInfo(String typeName, BaseTypeParams typeParams) { + super(typeName); + setParameters(typeParams); + } + + public BaseTypeParams getParameters() { + return typeParams; + } + + public void setParameters(BaseTypeParams typeParams) { + this.typeParams = typeParams; + } + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java @@ -0,0 +1,91 @@ +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.ParameterizedObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class ParameterizedPrimitiveTypeUtils { + + public static BaseTypeParams getTypeParamsFromTypeInfo(TypeInfo typeInfo) { + BaseTypeParams typeParams = null; + if (typeInfo instanceof ParameterizedPrimitiveTypeInfo) { + ParameterizedPrimitiveTypeInfo ppti = (ParameterizedPrimitiveTypeInfo)typeInfo; + typeParams = ppti.getParameters(); + } + return typeParams; + } + + public static BaseTypeParams getTypeParamsFromPrimitiveTypeEntry(PrimitiveTypeEntry typeEntry) { + return typeEntry.typeParams; + } + + public static BaseTypeParams getTypeParamsFromPrimitiveObjectInspector(PrimitiveObjectInspector oi) { + BaseTypeParams typeParams = null; + if (oi instanceof ParameterizedObjectInspector) { + ParameterizedObjectInspector poi = (ParameterizedObjectInspector)oi; + typeParams = poi.getTypeParams(); + } + return typeParams; + } + + /** + * Utils for varchar type + */ + public static class HiveVarcharSerDeHelper { + public int maxLength; + public HiveVarcharWritable writable = new HiveVarcharWritable(); + + public HiveVarcharSerDeHelper(VarcharTypeParams typeParams) { + if (typeParams == null) { + throw new RuntimeException("varchar type used without type params"); + } + maxLength = typeParams.getLength(); + } + } + + public static boolean doesWritableMatchTypeParams(HiveVarcharWritable writable, VarcharTypeParams typeParams) { + return (typeParams == null || typeParams.length >= writable.getCharacterLength()); + } + + public static boolean doesPrimitiveMatchTypeParams(HiveVarchar value, VarcharTypeParams typeParams) { + return (typeParams == null || typeParams.length == value.getCharacterLength()); + } +/* + public static HiveVarchar createHiveVarcharFromParams(VarcharTypeParams typeParams) { + if (typeParams == null) { + return new HiveVarchar(-1); + } else { + return new HiveVarchar(typeParams.length); + } + } + + public static HiveVarcharWritable createHiveVarcharWritableFromParams(VarcharTypeParams typeParams) { + if (typeParams == null) { + return new HiveVarcharWritable(); + } else { + return new HiveVarcharWritable(typeParams.length); + } + } +*/ +} Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java @@ -31,11 +31,11 @@ * Always use the TypeInfoFactory to create new TypeInfo objects, instead of * directly creating an instance of this class. */ -public final class PrimitiveTypeInfo extends TypeInfo implements Serializable { +public class PrimitiveTypeInfo extends TypeInfo implements Serializable { private static final long serialVersionUID = 1L; - private String typeName; + protected String typeName; /** * For java serialization use only. @@ -59,7 +59,8 @@ } public PrimitiveCategory getPrimitiveCategory() { - return PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(typeName).primitiveCategory; + return PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName( + TypeInfoUtils.getBaseName(typeName)).primitiveCategory; } public Class getPrimitiveWritableClass() { @@ -97,4 +98,7 @@ return typeName.hashCode(); } + public String toString() { + return typeName; + } } Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java @@ -49,6 +49,15 @@ */ public abstract String getTypeName(); + /** + * String representing the qualified type name. + * Qualified types should override this method. + * @return + */ + public String getQualifiedName() { + return getTypeName(); + } + @Override public String toString() { return getTypeName(); Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -22,8 +22,11 @@ import java.util.List; import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; /** * TypeInfoFactory can be used to create the TypeInfo object for any types. @@ -33,21 +36,36 @@ * objects that represents the same type. */ public final class TypeInfoFactory { - + private static Log LOG = LogFactory.getLog(TypeInfoFactory.class); static ConcurrentHashMap cachedPrimitiveTypeInfo = new ConcurrentHashMap(); private TypeInfoFactory() { // prevent instantiation } public static TypeInfo getPrimitiveTypeInfo(String typeName) { if (null == PrimitiveObjectInspectorUtils - .getTypeEntryFromTypeName(typeName)) { + .getTypeEntryFromTypeName(TypeInfoUtils.getBaseName(typeName))) { throw new RuntimeException("Cannot getPrimitiveTypeInfo for " + typeName); } TypeInfo result = cachedPrimitiveTypeInfo.get(typeName); if (result == null) { - result = new PrimitiveTypeInfo(typeName); + TypeInfoUtils.PrimitiveParts parts = TypeInfoUtils.parsePrimitiveParts(typeName); + // Create params if there are any + if (parts.typeParams != null && parts.typeParams.length > 0) { + // The type string came with parameters. Parse and add to TypeInfo + try { + BaseTypeParams typeParams = PrimitiveTypeEntry.createTypeParams(parts.typeName, parts.typeParams); + result = new ParameterizedPrimitiveTypeInfo(typeName, typeParams); + } catch (Exception err) { + LOG.error(err); + result = null; + } + } else { + // No type params + result = new PrimitiveTypeInfo(parts.typeName); + } + cachedPrimitiveTypeInfo.put(typeName, result); } return result; @@ -58,6 +76,7 @@ public static final TypeInfo intTypeInfo = getPrimitiveTypeInfo(serdeConstants.INT_TYPE_NAME); public static final TypeInfo longTypeInfo = getPrimitiveTypeInfo(serdeConstants.BIGINT_TYPE_NAME); public static final TypeInfo stringTypeInfo = getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); + public static final TypeInfo varcharTypeInfo = getPrimitiveTypeInfo(serdeConstants.VARCHAR_TYPE_NAME); public static final TypeInfo floatTypeInfo = getPrimitiveTypeInfo(serdeConstants.FLOAT_TYPE_NAME); public static final TypeInfo doubleTypeInfo = getPrimitiveTypeInfo(serdeConstants.DOUBLE_TYPE_NAME); public static final TypeInfo byteTypeInfo = getPrimitiveTypeInfo(serdeConstants.TINYINT_TYPE_NAME); Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -22,23 +22,25 @@ import java.lang.reflect.Method; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; -import java.util.concurrent.ConcurrentHashMap; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; @@ -56,7 +58,7 @@ /** * Return the extended TypeInfo from a Java type. By extended TypeInfo, we * allow unknownType for java.lang.Object. - * + * * @param t * The Java type. * @param m @@ -148,7 +150,7 @@ /** * Get the parameter TypeInfo for a method. - * + * * @param size * In case the last parameter of Method is an array, we will try to * return a List with the specified size by repeating the @@ -194,12 +196,46 @@ return typeInfos; } + public static boolean hasParameters(String typeName) { + int idx = typeName.indexOf('('); + if (idx == -1) { + return false; + } else { + return true; + } + } + + public static String getBaseName(String typeName) { + int idx = typeName.indexOf('('); + if (idx == -1) { + return typeName; + } else { + return typeName.substring(0, idx); + } + } + + /** + * returns true if both TypeInfos are of primitive type, and the primitive category matches. + * @param ti1 + * @param ti2 + * @return + */ + public static boolean doPrimitiveCategoriesMatch(TypeInfo ti1, TypeInfo ti2) { + if (ti1.getCategory() == Category.PRIMITIVE && ti2.getCategory() == Category.PRIMITIVE) { + if (((PrimitiveTypeInfo)ti1).getPrimitiveCategory() + == ((PrimitiveTypeInfo)ti2).getPrimitiveCategory()) { + return true; + } + } + return false; + } + /** * Parse a recursive TypeInfo list String. For example, the following inputs * are valid inputs: * "int,string,map,list>>,list>" * The separators between TypeInfos can be ",", ":", or ";". - * + * * In order to use this class: TypeInfoParser parser = new * TypeInfoParser("int,string"); ArrayList typeInfos = * parser.parseTypeInfos(); @@ -225,7 +261,7 @@ * Tokenize the typeInfoString. The rule is simple: all consecutive * alphadigits and '_', '.' are in one token, and all other characters are * one character per token. - * + * * tokenize("map") should return * ["map","<","int",",","string",">"] */ @@ -281,6 +317,14 @@ return typeInfos; } + private Token peek() { + if (iToken < typeInfoTokens.size()) { + return typeInfoTokens.get(iToken); + } else { + return null; + } + } + private Token expect(String item) { return expect(item, null); } @@ -320,6 +364,21 @@ return t; } + private String[] parseParams() { + List params = new LinkedList(); + + Token t = peek(); + if (t != null && t.text.equals("(")) { + expect("("); + + for(t = peek(); !t.text.equals(")"); t = expect(",",")")) { + params.add(expect("name").text); + } + } + + return params.toArray(new String[params.size()]); + } + private TypeInfo parseType() { Token t = expect("type"); @@ -329,7 +388,11 @@ .getTypeEntryFromTypeName(t.text); if (primitiveType != null && !primitiveType.primitiveCategory.equals(PrimitiveCategory.UNKNOWN)) { - return TypeInfoFactory.getPrimitiveTypeInfo(primitiveType.typeName); + if (primitiveType.isParameterized()) { + primitiveType = primitiveType.addParameters(parseParams()); + } +// return TypeInfoFactory.getPrimitiveTypeInfo(primitiveType.typeName); + return TypeInfoFactory.getPrimitiveTypeInfo(primitiveType.toString()); } // Is this a list type? @@ -399,6 +462,26 @@ + t.position + " of '" + typeInfoString + "'"); } + public PrimitiveParts parsePrimitiveParts() { + PrimitiveParts parts = new PrimitiveParts(); + Token t = expect("type"); + parts.typeName = t.text; + parts.typeParams = parseParams(); + return parts; + } + } + + public static class PrimitiveParts { + public String typeName; + public String[] typeParams; + } + + /** + * Make some of the TypeInfo parsing available as a utility. + */ + public static PrimitiveParts parsePrimitiveParts(String typeInfoString) { + TypeInfoParser parser = new TypeInfoParser(typeInfoString); + return parser.parsePrimitiveParts(); } static Map cachedStandardObjectInspector = @@ -414,9 +497,9 @@ if (result == null) { switch (typeInfo.getCategory()) { case PRIMITIVE: { - result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(((PrimitiveTypeInfo) typeInfo) - .getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), + ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo)); break; } case LIST: { @@ -495,7 +578,9 @@ // StandardPrimitiveObjectInspector result = PrimitiveObjectInspectorFactory .getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils - .getTypeEntryFromTypeName(typeInfo.getTypeName()).primitiveCategory); + .getTypeEntryFromTypeName( + typeInfo.getTypeName()).primitiveCategory, + ParameterizedPrimitiveTypeUtils.getTypeParamsFromTypeInfo(typeInfo)); break; } case LIST: { Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeParams.java =================================================================== --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/VarcharTypeParams.java @@ -0,0 +1,87 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; + +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.io.WritableUtils; + +public class VarcharTypeParams extends BaseTypeParams implements Serializable { + private static final long serialVersionUID = 1L; + + public int length; + + public void validateParams() throws SerDeException { + if (length < 1) { + throw new SerDeException("VARCHAR length must be non-negative"); + } + } + + public void populateParams(String[] params) throws SerDeException { + if (params.length != 1) { + throw new SerDeException("Invalid number of parameters for VARCHAR"); + } + try { + length = Integer.valueOf(params[0]); + } catch (NumberFormatException err) { + throw new SerDeException("Error setting VARCHAR length: " + err); + } + } + + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("("); + sb.append(length); + sb.append(")"); + return sb.toString(); + } + + @Override + public void readFields(DataInput in) throws IOException { + length = WritableUtils.readVInt(in); + try { + validateParams(); + } catch (SerDeException err) { + throw new IOException(err); + } + } + + @Override + public void write(DataOutput out) throws IOException { + WritableUtils.writeVInt(out, length); + } + + public int getLength() { + return length; + } + + public void setLength(int len) { + length = len; + } + + public boolean hasCharacterMaximumLength() { + return true; + } + public int getCharacterMaximumLength() { + return length; + } +} Index: service/if/TCLIService.thrift =================================================================== --- service/if/TCLIService.thrift +++ service/if/TCLIService.thrift @@ -59,7 +59,8 @@ USER_DEFINED_TYPE, DECIMAL_TYPE, NULL_TYPE, - DATE_TYPE + DATE_TYPE, + VARCHAR_TYPE } const set PRIMITIVE_TYPES = [ @@ -156,11 +157,19 @@ typedef i32 TTypeEntryPtr +// Type qualifiers for primitive type. +struct TTypeQualifiers { + 1: optional i32 characterMaximumLength + 2: optional i32 numericPrecision + 3: optional i32 numericScale +} + // Type entry for a primitive type. struct TPrimitiveTypeEntry { // The primitive type token. This must satisfy the condition // that type is in the PRIMITIVE_TYPES set. 1: required TTypeId type + 2: optional TTypeQualifiers typeQualifiers } // Type entry for an ARRAY type. Index: service/src/gen/thrift/gen-cpp/TCLIService_types.h =================================================================== --- service/src/gen/thrift/gen-cpp/TCLIService_types.h +++ service/src/gen/thrift/gen-cpp/TCLIService_types.h @@ -43,7 +43,8 @@ USER_DEFINED_TYPE = 14, DECIMAL_TYPE = 15, NULL_TYPE = 16, - DATE_TYPE = 17 + DATE_TYPE = 17, + VARCHAR_TYPE = 18 }; }; @@ -166,28 +167,112 @@ typedef std::string TPatternOrIdentifier; +typedef struct _TTypeQualifiers__isset { + _TTypeQualifiers__isset() : characterMaximumLength(false), numericPrecision(false), numericScale(false) {} + bool characterMaximumLength; + bool numericPrecision; + bool numericScale; +} _TTypeQualifiers__isset; + +class TTypeQualifiers { + public: + + static const char* ascii_fingerprint; // = "CE4F8A673F7F1EE0C3F9D3A7932473D6"; + static const uint8_t binary_fingerprint[16]; // = {0xCE,0x4F,0x8A,0x67,0x3F,0x7F,0x1E,0xE0,0xC3,0xF9,0xD3,0xA7,0x93,0x24,0x73,0xD6}; + + TTypeQualifiers() : characterMaximumLength(0), numericPrecision(0), numericScale(0) { + } + + virtual ~TTypeQualifiers() throw() {} + + int32_t characterMaximumLength; + int32_t numericPrecision; + int32_t numericScale; + + _TTypeQualifiers__isset __isset; + + void __set_characterMaximumLength(const int32_t val) { + characterMaximumLength = val; + __isset.characterMaximumLength = true; + } + + void __set_numericPrecision(const int32_t val) { + numericPrecision = val; + __isset.numericPrecision = true; + } + + void __set_numericScale(const int32_t val) { + numericScale = val; + __isset.numericScale = true; + } + + bool operator == (const TTypeQualifiers & rhs) const + { + if (__isset.characterMaximumLength != rhs.__isset.characterMaximumLength) + return false; + else if (__isset.characterMaximumLength && !(characterMaximumLength == rhs.characterMaximumLength)) + return false; + if (__isset.numericPrecision != rhs.__isset.numericPrecision) + return false; + else if (__isset.numericPrecision && !(numericPrecision == rhs.numericPrecision)) + return false; + if (__isset.numericScale != rhs.__isset.numericScale) + return false; + else if (__isset.numericScale && !(numericScale == rhs.numericScale)) + return false; + return true; + } + bool operator != (const TTypeQualifiers &rhs) const { + return !(*this == rhs); + } + + bool operator < (const TTypeQualifiers & ) const; + + uint32_t read(::apache::thrift::protocol::TProtocol* iprot); + uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const; + +}; + +void swap(TTypeQualifiers &a, TTypeQualifiers &b); + +typedef struct _TPrimitiveTypeEntry__isset { + _TPrimitiveTypeEntry__isset() : typeQualifiers(false) {} + bool typeQualifiers; +} _TPrimitiveTypeEntry__isset; class TPrimitiveTypeEntry { public: - static const char* ascii_fingerprint; // = "8BBB3D0C3B370CB38F2D1340BB79F0AA"; - static const uint8_t binary_fingerprint[16]; // = {0x8B,0xBB,0x3D,0x0C,0x3B,0x37,0x0C,0xB3,0x8F,0x2D,0x13,0x40,0xBB,0x79,0xF0,0xAA}; + static const char* ascii_fingerprint; // = "30105191C92191E7743519A065A66138"; + static const uint8_t binary_fingerprint[16]; // = {0x30,0x10,0x51,0x91,0xC9,0x21,0x91,0xE7,0x74,0x35,0x19,0xA0,0x65,0xA6,0x61,0x38}; TPrimitiveTypeEntry() : type((TTypeId::type)0) { } virtual ~TPrimitiveTypeEntry() throw() {} TTypeId::type type; + TTypeQualifiers typeQualifiers; + + _TPrimitiveTypeEntry__isset __isset; void __set_type(const TTypeId::type val) { type = val; } + void __set_typeQualifiers(const TTypeQualifiers& val) { + typeQualifiers = val; + __isset.typeQualifiers = true; + } + bool operator == (const TPrimitiveTypeEntry & rhs) const { if (!(type == rhs.type)) return false; + if (__isset.typeQualifiers != rhs.__isset.typeQualifiers) + return false; + else if (__isset.typeQualifiers && !(typeQualifiers == rhs.typeQualifiers)) + return false; return true; } bool operator != (const TPrimitiveTypeEntry &rhs) const { @@ -408,8 +493,8 @@ class TTypeEntry { public: - static const char* ascii_fingerprint; // = "0C3ACE4054603E2D37B8BFEBA79F4159"; - static const uint8_t binary_fingerprint[16]; // = {0x0C,0x3A,0xCE,0x40,0x54,0x60,0x3E,0x2D,0x37,0xB8,0xBF,0xEB,0xA7,0x9F,0x41,0x59}; + static const char* ascii_fingerprint; // = "10C0ED4977C91BA98BDEC79F56C1875D"; + static const uint8_t binary_fingerprint[16]; // = {0x10,0xC0,0xED,0x49,0x77,0xC9,0x1B,0xA9,0x8B,0xDE,0xC7,0x9F,0x56,0xC1,0x87,0x5D}; TTypeEntry() { } @@ -482,8 +567,8 @@ class TTypeDesc { public: - static const char* ascii_fingerprint; // = "60CA5B8BACFCD38D1D3EC1F0E3F1C36A"; - static const uint8_t binary_fingerprint[16]; // = {0x60,0xCA,0x5B,0x8B,0xAC,0xFC,0xD3,0x8D,0x1D,0x3E,0xC1,0xF0,0xE3,0xF1,0xC3,0x6A}; + static const char* ascii_fingerprint; // = "DB5D4F63BFF4E01C66559DB2CDEEB89A"; + static const uint8_t binary_fingerprint[16]; // = {0xDB,0x5D,0x4F,0x63,0xBF,0xF4,0xE0,0x1C,0x66,0x55,0x9D,0xB2,0xCD,0xEE,0xB8,0x9A}; TTypeDesc() { } @@ -523,8 +608,8 @@ class TColumnDesc { public: - static const char* ascii_fingerprint; // = "0DF9A37B81B1EE73D35A0AC01F33A48D"; - static const uint8_t binary_fingerprint[16]; // = {0x0D,0xF9,0xA3,0x7B,0x81,0xB1,0xEE,0x73,0xD3,0x5A,0x0A,0xC0,0x1F,0x33,0xA4,0x8D}; + static const char* ascii_fingerprint; // = "3FB4278B6C66D1131D021565AB08F16C"; + static const uint8_t binary_fingerprint[16]; // = {0x3F,0xB4,0x27,0x8B,0x6C,0x66,0xD1,0x13,0x1D,0x02,0x15,0x65,0xAB,0x08,0xF1,0x6C}; TColumnDesc() : columnName(), position(0), comment() { } @@ -586,8 +671,8 @@ class TTableSchema { public: - static const char* ascii_fingerprint; // = "E67E789F1EF836E4B9FC922C788AFDC8"; - static const uint8_t binary_fingerprint[16]; // = {0xE6,0x7E,0x78,0x9F,0x1E,0xF8,0x36,0xE4,0xB9,0xFC,0x92,0x2C,0x78,0x8A,0xFD,0xC8}; + static const char* ascii_fingerprint; // = "14F3A9B4945FE7A08507C30505B77F54"; + static const uint8_t binary_fingerprint[16]; // = {0x14,0xF3,0xA9,0xB4,0x94,0x5F,0xE7,0xA0,0x85,0x07,0xC3,0x05,0x05,0xB7,0x7F,0x54}; TTableSchema() { } @@ -3034,8 +3119,8 @@ class TGetResultSetMetadataResp { public: - static const char* ascii_fingerprint; // = "8778316D0AFC17584F192162BFF2AEDE"; - static const uint8_t binary_fingerprint[16]; // = {0x87,0x78,0x31,0x6D,0x0A,0xFC,0x17,0x58,0x4F,0x19,0x21,0x62,0xBF,0xF2,0xAE,0xDE}; + static const char* ascii_fingerprint; // = "4596D82CD0706F9565DA1D2F7A8124A3"; + static const uint8_t binary_fingerprint[16]; // = {0x45,0x96,0xD8,0x2C,0xD0,0x70,0x6F,0x95,0x65,0xDA,0x1D,0x2F,0x7A,0x81,0x24,0xA3}; TGetResultSetMetadataResp() { } Index: service/src/gen/thrift/gen-cpp/TCLIService_types.cpp =================================================================== --- service/src/gen/thrift/gen-cpp/TCLIService_types.cpp +++ service/src/gen/thrift/gen-cpp/TCLIService_types.cpp @@ -36,7 +36,8 @@ TTypeId::USER_DEFINED_TYPE, TTypeId::DECIMAL_TYPE, TTypeId::NULL_TYPE, - TTypeId::DATE_TYPE + TTypeId::DATE_TYPE, + TTypeId::VARCHAR_TYPE }; const char* _kTTypeIdNames[] = { "BOOLEAN_TYPE", @@ -56,9 +57,10 @@ "USER_DEFINED_TYPE", "DECIMAL_TYPE", "NULL_TYPE", - "DATE_TYPE" + "DATE_TYPE", + "VARCHAR_TYPE" }; -const std::map _TTypeId_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(18, _kTTypeIdValues, _kTTypeIdNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); +const std::map _TTypeId_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(19, _kTTypeIdValues, _kTTypeIdNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); int _kTStatusCodeValues[] = { TStatusCode::SUCCESS_STATUS, @@ -238,8 +240,99 @@ }; const std::map _TFetchOrientation_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(6, _kTFetchOrientationValues, _kTFetchOrientationNames), ::apache::thrift::TEnumIterator(-1, NULL, NULL)); -const char* TPrimitiveTypeEntry::ascii_fingerprint = "8BBB3D0C3B370CB38F2D1340BB79F0AA"; -const uint8_t TPrimitiveTypeEntry::binary_fingerprint[16] = {0x8B,0xBB,0x3D,0x0C,0x3B,0x37,0x0C,0xB3,0x8F,0x2D,0x13,0x40,0xBB,0x79,0xF0,0xAA}; +const char* TTypeQualifiers::ascii_fingerprint = "CE4F8A673F7F1EE0C3F9D3A7932473D6"; +const uint8_t TTypeQualifiers::binary_fingerprint[16] = {0xCE,0x4F,0x8A,0x67,0x3F,0x7F,0x1E,0xE0,0xC3,0xF9,0xD3,0xA7,0x93,0x24,0x73,0xD6}; + +uint32_t TTypeQualifiers::read(::apache::thrift::protocol::TProtocol* iprot) { + + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->characterMaximumLength); + this->__isset.characterMaximumLength = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->numericPrecision); + this->__isset.numericPrecision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->numericScale); + this->__isset.numericScale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +uint32_t TTypeQualifiers::write(::apache::thrift::protocol::TProtocol* oprot) const { + uint32_t xfer = 0; + xfer += oprot->writeStructBegin("TTypeQualifiers"); + + if (this->__isset.characterMaximumLength) { + xfer += oprot->writeFieldBegin("characterMaximumLength", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->characterMaximumLength); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.numericPrecision) { + xfer += oprot->writeFieldBegin("numericPrecision", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->numericPrecision); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.numericScale) { + xfer += oprot->writeFieldBegin("numericScale", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->numericScale); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +void swap(TTypeQualifiers &a, TTypeQualifiers &b) { + using ::std::swap; + swap(a.characterMaximumLength, b.characterMaximumLength); + swap(a.numericPrecision, b.numericPrecision); + swap(a.numericScale, b.numericScale); + swap(a.__isset, b.__isset); +} + +const char* TPrimitiveTypeEntry::ascii_fingerprint = "30105191C92191E7743519A065A66138"; +const uint8_t TPrimitiveTypeEntry::binary_fingerprint[16] = {0x30,0x10,0x51,0x91,0xC9,0x21,0x91,0xE7,0x74,0x35,0x19,0xA0,0x65,0xA6,0x61,0x38}; uint32_t TPrimitiveTypeEntry::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -272,6 +365,14 @@ xfer += iprot->skip(ftype); } break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->typeQualifiers.read(iprot); + this->__isset.typeQualifiers = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -294,14 +395,21 @@ xfer += oprot->writeI32((int32_t)this->type); xfer += oprot->writeFieldEnd(); + if (this->__isset.typeQualifiers) { + xfer += oprot->writeFieldBegin("typeQualifiers", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->typeQualifiers.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; } void swap(TPrimitiveTypeEntry &a, TPrimitiveTypeEntry &b) { using ::std::swap; swap(a.type, b.type); + swap(a.typeQualifiers, b.typeQualifiers); + swap(a.__isset, b.__isset); } const char* TArrayTypeEntry::ascii_fingerprint = "E86CACEB22240450EDCBEFC3A83970E4"; @@ -688,8 +796,8 @@ swap(a.typeClassName, b.typeClassName); } -const char* TTypeEntry::ascii_fingerprint = "0C3ACE4054603E2D37B8BFEBA79F4159"; -const uint8_t TTypeEntry::binary_fingerprint[16] = {0x0C,0x3A,0xCE,0x40,0x54,0x60,0x3E,0x2D,0x37,0xB8,0xBF,0xEB,0xA7,0x9F,0x41,0x59}; +const char* TTypeEntry::ascii_fingerprint = "10C0ED4977C91BA98BDEC79F56C1875D"; +const uint8_t TTypeEntry::binary_fingerprint[16] = {0x10,0xC0,0xED,0x49,0x77,0xC9,0x1B,0xA9,0x8B,0xDE,0xC7,0x9F,0x56,0xC1,0x87,0x5D}; uint32_t TTypeEntry::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -815,8 +923,8 @@ swap(a.__isset, b.__isset); } -const char* TTypeDesc::ascii_fingerprint = "60CA5B8BACFCD38D1D3EC1F0E3F1C36A"; -const uint8_t TTypeDesc::binary_fingerprint[16] = {0x60,0xCA,0x5B,0x8B,0xAC,0xFC,0xD3,0x8D,0x1D,0x3E,0xC1,0xF0,0xE3,0xF1,0xC3,0x6A}; +const char* TTypeDesc::ascii_fingerprint = "DB5D4F63BFF4E01C66559DB2CDEEB89A"; +const uint8_t TTypeDesc::binary_fingerprint[16] = {0xDB,0x5D,0x4F,0x63,0xBF,0xF4,0xE0,0x1C,0x66,0x55,0x9D,0xB2,0xCD,0xEE,0xB8,0x9A}; uint32_t TTypeDesc::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -899,8 +1007,8 @@ swap(a.types, b.types); } -const char* TColumnDesc::ascii_fingerprint = "0DF9A37B81B1EE73D35A0AC01F33A48D"; -const uint8_t TColumnDesc::binary_fingerprint[16] = {0x0D,0xF9,0xA3,0x7B,0x81,0xB1,0xEE,0x73,0xD3,0x5A,0x0A,0xC0,0x1F,0x33,0xA4,0x8D}; +const char* TColumnDesc::ascii_fingerprint = "3FB4278B6C66D1131D021565AB08F16C"; +const uint8_t TColumnDesc::binary_fingerprint[16] = {0x3F,0xB4,0x27,0x8B,0x6C,0x66,0xD1,0x13,0x1D,0x02,0x15,0x65,0xAB,0x08,0xF1,0x6C}; uint32_t TColumnDesc::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -1010,8 +1118,8 @@ swap(a.__isset, b.__isset); } -const char* TTableSchema::ascii_fingerprint = "E67E789F1EF836E4B9FC922C788AFDC8"; -const uint8_t TTableSchema::binary_fingerprint[16] = {0xE6,0x7E,0x78,0x9F,0x1E,0xF8,0x36,0xE4,0xB9,0xFC,0x92,0x2C,0x78,0x8A,0xFD,0xC8}; +const char* TTableSchema::ascii_fingerprint = "14F3A9B4945FE7A08507C30505B77F54"; +const uint8_t TTableSchema::binary_fingerprint[16] = {0x14,0xF3,0xA9,0xB4,0x94,0x5F,0xE7,0xA0,0x85,0x07,0xC3,0x05,0x05,0xB7,0x7F,0x54}; uint32_t TTableSchema::read(::apache::thrift::protocol::TProtocol* iprot) { @@ -5131,8 +5239,8 @@ swap(a.operationHandle, b.operationHandle); } -const char* TGetResultSetMetadataResp::ascii_fingerprint = "8778316D0AFC17584F192162BFF2AEDE"; -const uint8_t TGetResultSetMetadataResp::binary_fingerprint[16] = {0x87,0x78,0x31,0x6D,0x0A,0xFC,0x17,0x58,0x4F,0x19,0x21,0x62,0xBF,0xF2,0xAE,0xDE}; +const char* TGetResultSetMetadataResp::ascii_fingerprint = "4596D82CD0706F9565DA1D2F7A8124A3"; +const uint8_t TGetResultSetMetadataResp::binary_fingerprint[16] = {0x45,0x96,0xD8,0x2C,0xD0,0x70,0x6F,0x95,0x65,0xDA,0x1D,0x2F,0x7A,0x81,0x24,0xA3}; uint32_t TGetResultSetMetadataResp::read(::apache::thrift::protocol::TProtocol* iprot) { Index: service/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/service/ThriftHive.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/service/ThriftHive.java +++ service/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/service/ThriftHive.java @@ -3023,7 +3023,7 @@ struct.success = new ArrayList(_list0.size); for (int _i1 = 0; _i1 < _list0.size; ++_i1) { - String _elem2; // required + String _elem2; // optional _elem2 = iprot.readString(); struct.success.add(_elem2); } @@ -3122,7 +3122,7 @@ struct.success = new ArrayList(_list5.size); for (int _i6 = 0; _i6 < _list5.size; ++_i6) { - String _elem7; // required + String _elem7; // optional _elem7 = iprot.readString(); struct.success.add(_elem7); } @@ -3785,7 +3785,7 @@ struct.success = new ArrayList(_list8.size); for (int _i9 = 0; _i9 < _list8.size; ++_i9) { - String _elem10; // required + String _elem10; // optional _elem10 = iprot.readString(); struct.success.add(_elem10); } @@ -3884,7 +3884,7 @@ struct.success = new ArrayList(_list13.size); for (int _i14 = 0; _i14 < _list13.size; ++_i14) { - String _elem15; // required + String _elem15; // optional _elem15 = iprot.readString(); struct.success.add(_elem15); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TColumn.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TColumn.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TColumn.java @@ -259,7 +259,7 @@ boolColumn = new ArrayList(_list36.size); for (int _i37 = 0; _i37 < _list36.size; ++_i37) { - TBoolValue _elem38; // required + TBoolValue _elem38; // optional _elem38 = new TBoolValue(); _elem38.read(iprot); boolColumn.add(_elem38); @@ -279,7 +279,7 @@ byteColumn = new ArrayList(_list39.size); for (int _i40 = 0; _i40 < _list39.size; ++_i40) { - TByteValue _elem41; // required + TByteValue _elem41; // optional _elem41 = new TByteValue(); _elem41.read(iprot); byteColumn.add(_elem41); @@ -299,7 +299,7 @@ i16Column = new ArrayList(_list42.size); for (int _i43 = 0; _i43 < _list42.size; ++_i43) { - TI16Value _elem44; // required + TI16Value _elem44; // optional _elem44 = new TI16Value(); _elem44.read(iprot); i16Column.add(_elem44); @@ -319,7 +319,7 @@ i32Column = new ArrayList(_list45.size); for (int _i46 = 0; _i46 < _list45.size; ++_i46) { - TI32Value _elem47; // required + TI32Value _elem47; // optional _elem47 = new TI32Value(); _elem47.read(iprot); i32Column.add(_elem47); @@ -339,7 +339,7 @@ i64Column = new ArrayList(_list48.size); for (int _i49 = 0; _i49 < _list48.size; ++_i49) { - TI64Value _elem50; // required + TI64Value _elem50; // optional _elem50 = new TI64Value(); _elem50.read(iprot); i64Column.add(_elem50); @@ -359,7 +359,7 @@ doubleColumn = new ArrayList(_list51.size); for (int _i52 = 0; _i52 < _list51.size; ++_i52) { - TDoubleValue _elem53; // required + TDoubleValue _elem53; // optional _elem53 = new TDoubleValue(); _elem53.read(iprot); doubleColumn.add(_elem53); @@ -379,7 +379,7 @@ stringColumn = new ArrayList(_list54.size); for (int _i55 = 0; _i55 < _list54.size; ++_i55) { - TStringValue _elem56; // required + TStringValue _elem56; // optional _elem56 = new TStringValue(); _elem56.read(iprot); stringColumn.add(_elem56); @@ -496,7 +496,7 @@ boolColumn = new ArrayList(_list64.size); for (int _i65 = 0; _i65 < _list64.size; ++_i65) { - TBoolValue _elem66; // required + TBoolValue _elem66; // optional _elem66 = new TBoolValue(); _elem66.read(iprot); boolColumn.add(_elem66); @@ -511,7 +511,7 @@ byteColumn = new ArrayList(_list67.size); for (int _i68 = 0; _i68 < _list67.size; ++_i68) { - TByteValue _elem69; // required + TByteValue _elem69; // optional _elem69 = new TByteValue(); _elem69.read(iprot); byteColumn.add(_elem69); @@ -526,7 +526,7 @@ i16Column = new ArrayList(_list70.size); for (int _i71 = 0; _i71 < _list70.size; ++_i71) { - TI16Value _elem72; // required + TI16Value _elem72; // optional _elem72 = new TI16Value(); _elem72.read(iprot); i16Column.add(_elem72); @@ -541,7 +541,7 @@ i32Column = new ArrayList(_list73.size); for (int _i74 = 0; _i74 < _list73.size; ++_i74) { - TI32Value _elem75; // required + TI32Value _elem75; // optional _elem75 = new TI32Value(); _elem75.read(iprot); i32Column.add(_elem75); @@ -556,7 +556,7 @@ i64Column = new ArrayList(_list76.size); for (int _i77 = 0; _i77 < _list76.size; ++_i77) { - TI64Value _elem78; // required + TI64Value _elem78; // optional _elem78 = new TI64Value(); _elem78.read(iprot); i64Column.add(_elem78); @@ -571,7 +571,7 @@ doubleColumn = new ArrayList(_list79.size); for (int _i80 = 0; _i80 < _list79.size; ++_i80) { - TDoubleValue _elem81; // required + TDoubleValue _elem81; // optional _elem81 = new TDoubleValue(); _elem81.read(iprot); doubleColumn.add(_elem81); @@ -586,7 +586,7 @@ stringColumn = new ArrayList(_list82.size); for (int _i83 = 0; _i83 < _list82.size; ++_i83) { - TStringValue _elem84; // required + TStringValue _elem84; // optional _elem84 = new TStringValue(); _elem84.read(iprot); stringColumn.add(_elem84); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TGetTablesReq.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TGetTablesReq.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TGetTablesReq.java @@ -715,7 +715,7 @@ struct.tableTypes = new ArrayList(_list154.size); for (int _i155 = 0; _i155 < _list154.size; ++_i155) { - String _elem156; // required + String _elem156; // optional _elem156 = iprot.readString(); struct.tableTypes.add(_elem156); } @@ -856,7 +856,7 @@ struct.tableTypes = new ArrayList(_list159.size); for (int _i160 = 0; _i160 < _list159.size; ++_i160) { - String _elem161; // required + String _elem161; // optional _elem161 = iprot.readString(); struct.tableTypes.add(_elem161); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TPrimitiveTypeEntry.java @@ -35,22 +35,25 @@ private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TPrimitiveTypeEntry"); private static final org.apache.thrift.protocol.TField TYPE_FIELD_DESC = new org.apache.thrift.protocol.TField("type", org.apache.thrift.protocol.TType.I32, (short)1); + private static final org.apache.thrift.protocol.TField TYPE_QUALIFIERS_FIELD_DESC = new org.apache.thrift.protocol.TField("typeQualifiers", org.apache.thrift.protocol.TType.STRUCT, (short)2); private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); static { schemes.put(StandardScheme.class, new TPrimitiveTypeEntryStandardSchemeFactory()); schemes.put(TupleScheme.class, new TPrimitiveTypeEntryTupleSchemeFactory()); } private TTypeId type; // required + private TTypeQualifiers typeQualifiers; // optional /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { /** * * @see TTypeId */ - TYPE((short)1, "type"); + TYPE((short)1, "type"), + TYPE_QUALIFIERS((short)2, "typeQualifiers"); private static final Map byName = new HashMap(); @@ -67,6 +70,8 @@ switch(fieldId) { case 1: // TYPE return TYPE; + case 2: // TYPE_QUALIFIERS + return TYPE_QUALIFIERS; default: return null; } @@ -107,11 +112,14 @@ } // isset id assignments + private _Fields optionals[] = {_Fields.TYPE_QUALIFIERS}; public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; static { Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); tmpMap.put(_Fields.TYPE, new org.apache.thrift.meta_data.FieldMetaData("type", org.apache.thrift.TFieldRequirementType.REQUIRED, new org.apache.thrift.meta_data.EnumMetaData(org.apache.thrift.protocol.TType.ENUM, TTypeId.class))); + tmpMap.put(_Fields.TYPE_QUALIFIERS, new org.apache.thrift.meta_data.FieldMetaData("typeQualifiers", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TTypeQualifiers.class))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TPrimitiveTypeEntry.class, metaDataMap); } @@ -133,6 +141,9 @@ if (other.isSetType()) { this.type = other.type; } + if (other.isSetTypeQualifiers()) { + this.typeQualifiers = new TTypeQualifiers(other.typeQualifiers); + } } public TPrimitiveTypeEntry deepCopy() { @@ -142,6 +153,7 @@ @Override public void clear() { this.type = null; + this.typeQualifiers = null; } /** @@ -175,6 +187,29 @@ } } + public TTypeQualifiers getTypeQualifiers() { + return this.typeQualifiers; + } + + public void setTypeQualifiers(TTypeQualifiers typeQualifiers) { + this.typeQualifiers = typeQualifiers; + } + + public void unsetTypeQualifiers() { + this.typeQualifiers = null; + } + + /** Returns true if field typeQualifiers is set (has been assigned a value) and false otherwise */ + public boolean isSetTypeQualifiers() { + return this.typeQualifiers != null; + } + + public void setTypeQualifiersIsSet(boolean value) { + if (!value) { + this.typeQualifiers = null; + } + } + public void setFieldValue(_Fields field, Object value) { switch (field) { case TYPE: @@ -185,14 +220,25 @@ } break; + case TYPE_QUALIFIERS: + if (value == null) { + unsetTypeQualifiers(); + } else { + setTypeQualifiers((TTypeQualifiers)value); + } + break; + } } public Object getFieldValue(_Fields field) { switch (field) { case TYPE: return getType(); + case TYPE_QUALIFIERS: + return getTypeQualifiers(); + } throw new IllegalStateException(); } @@ -206,6 +252,8 @@ switch (field) { case TYPE: return isSetType(); + case TYPE_QUALIFIERS: + return isSetTypeQualifiers(); } throw new IllegalStateException(); } @@ -232,6 +280,15 @@ return false; } + boolean this_present_typeQualifiers = true && this.isSetTypeQualifiers(); + boolean that_present_typeQualifiers = true && that.isSetTypeQualifiers(); + if (this_present_typeQualifiers || that_present_typeQualifiers) { + if (!(this_present_typeQualifiers && that_present_typeQualifiers)) + return false; + if (!this.typeQualifiers.equals(that.typeQualifiers)) + return false; + } + return true; } @@ -244,6 +301,11 @@ if (present_type) builder.append(type.getValue()); + boolean present_typeQualifiers = true && (isSetTypeQualifiers()); + builder.append(present_typeQualifiers); + if (present_typeQualifiers) + builder.append(typeQualifiers); + return builder.toHashCode(); } @@ -265,6 +327,16 @@ return lastComparison; } } + lastComparison = Boolean.valueOf(isSetTypeQualifiers()).compareTo(typedOther.isSetTypeQualifiers()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetTypeQualifiers()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.typeQualifiers, typedOther.typeQualifiers); + if (lastComparison != 0) { + return lastComparison; + } + } return 0; } @@ -292,6 +364,16 @@ sb.append(this.type); } first = false; + if (isSetTypeQualifiers()) { + if (!first) sb.append(", "); + sb.append("typeQualifiers:"); + if (this.typeQualifiers == null) { + sb.append("null"); + } else { + sb.append(this.typeQualifiers); + } + first = false; + } sb.append(")"); return sb.toString(); } @@ -303,6 +385,9 @@ } // check for sub-struct validity + if (typeQualifiers != null) { + typeQualifiers.validate(); + } } private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { @@ -347,6 +432,15 @@ org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } break; + case 2: // TYPE_QUALIFIERS + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.typeQualifiers = new TTypeQualifiers(); + struct.typeQualifiers.read(iprot); + struct.setTypeQualifiersIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; default: org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); } @@ -365,6 +459,13 @@ oprot.writeI32(struct.type.getValue()); oprot.writeFieldEnd(); } + if (struct.typeQualifiers != null) { + if (struct.isSetTypeQualifiers()) { + oprot.writeFieldBegin(TYPE_QUALIFIERS_FIELD_DESC); + struct.typeQualifiers.write(oprot); + oprot.writeFieldEnd(); + } + } oprot.writeFieldStop(); oprot.writeStructEnd(); } @@ -383,13 +484,27 @@ public void write(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { TTupleProtocol oprot = (TTupleProtocol) prot; oprot.writeI32(struct.type.getValue()); + BitSet optionals = new BitSet(); + if (struct.isSetTypeQualifiers()) { + optionals.set(0); + } + oprot.writeBitSet(optionals, 1); + if (struct.isSetTypeQualifiers()) { + struct.typeQualifiers.write(oprot); + } } @Override public void read(org.apache.thrift.protocol.TProtocol prot, TPrimitiveTypeEntry struct) throws org.apache.thrift.TException { TTupleProtocol iprot = (TTupleProtocol) prot; struct.type = TTypeId.findByValue(iprot.readI32()); struct.setTypeIsSet(true); + BitSet incoming = iprot.readBitSet(1); + if (incoming.get(0)) { + struct.typeQualifiers = new TTypeQualifiers(); + struct.typeQualifiers.read(iprot); + struct.setTypeQualifiersIsSet(true); + } } } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRow.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRow.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRow.java @@ -354,7 +354,7 @@ struct.colVals = new ArrayList(_list92.size); for (int _i93 = 0; _i93 < _list92.size; ++_i93) { - TColumnValue _elem94; // required + TColumnValue _elem94; // optional _elem94 = new TColumnValue(); _elem94.read(iprot); struct.colVals.add(_elem94); @@ -425,7 +425,7 @@ struct.colVals = new ArrayList(_list97.size); for (int _i98 = 0; _i98 < _list97.size; ++_i98) { - TColumnValue _elem99; // required + TColumnValue _elem99; // optional _elem99 = new TColumnValue(); _elem99.read(iprot); struct.colVals.add(_elem99); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRowSet.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRowSet.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TRowSet.java @@ -549,7 +549,7 @@ struct.rows = new ArrayList(_list100.size); for (int _i101 = 0; _i101 < _list100.size; ++_i101) { - TRow _elem102; // required + TRow _elem102; // optional _elem102 = new TRow(); _elem102.read(iprot); struct.rows.add(_elem102); @@ -568,7 +568,7 @@ struct.columns = new ArrayList(_list103.size); for (int _i104 = 0; _i104 < _list103.size; ++_i104) { - TColumn _elem105; // required + TColumn _elem105; // optional _elem105 = new TColumn(); _elem105.read(iprot); struct.columns.add(_elem105); @@ -673,7 +673,7 @@ struct.rows = new ArrayList(_list110.size); for (int _i111 = 0; _i111 < _list110.size; ++_i111) { - TRow _elem112; // required + TRow _elem112; // optional _elem112 = new TRow(); _elem112.read(iprot); struct.rows.add(_elem112); @@ -687,7 +687,7 @@ struct.columns = new ArrayList(_list113.size); for (int _i114 = 0; _i114 < _list113.size; ++_i114) { - TColumn _elem115; // required + TColumn _elem115; // optional _elem115 = new TColumn(); _elem115.read(iprot); struct.columns.add(_elem115); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStatus.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStatus.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TStatus.java @@ -698,7 +698,7 @@ struct.infoMessages = new ArrayList(_list116.size); for (int _i117 = 0; _i117 < _list116.size; ++_i117) { - String _elem118; // required + String _elem118; // optional _elem118 = iprot.readString(); struct.infoMessages.add(_elem118); } @@ -848,7 +848,7 @@ struct.infoMessages = new ArrayList(_list121.size); for (int _i122 = 0; _i122 < _list121.size; ++_i122) { - String _elem123; // required + String _elem123; // optional _elem123 = iprot.readString(); struct.infoMessages.add(_elem123); } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTableSchema.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTableSchema.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTableSchema.java @@ -354,7 +354,7 @@ struct.columns = new ArrayList(_list28.size); for (int _i29 = 0; _i29 < _list28.size; ++_i29) { - TColumnDesc _elem30; // required + TColumnDesc _elem30; // optional _elem30 = new TColumnDesc(); _elem30.read(iprot); struct.columns.add(_elem30); @@ -425,7 +425,7 @@ struct.columns = new ArrayList(_list33.size); for (int _i34 = 0; _i34 < _list33.size; ++_i34) { - TColumnDesc _elem35; // required + TColumnDesc _elem35; // optional _elem35 = new TColumnDesc(); _elem35.read(iprot); struct.columns.add(_elem35); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeDesc.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeDesc.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeDesc.java @@ -354,7 +354,7 @@ struct.types = new ArrayList(_list20.size); for (int _i21 = 0; _i21 < _list20.size; ++_i21) { - TTypeEntry _elem22; // required + TTypeEntry _elem22; // optional _elem22 = new TTypeEntry(); _elem22.read(iprot); struct.types.add(_elem22); @@ -425,7 +425,7 @@ struct.types = new ArrayList(_list25.size); for (int _i26 = 0; _i26 < _list25.size; ++_i26) { - TTypeEntry _elem27; // required + TTypeEntry _elem27; // optional _elem27 = new TTypeEntry(); _elem27.read(iprot); struct.types.add(_elem27); Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeId.java =================================================================== --- service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeId.java +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeId.java @@ -29,7 +29,8 @@ USER_DEFINED_TYPE(14), DECIMAL_TYPE(15), NULL_TYPE(16), - DATE_TYPE(17); + DATE_TYPE(17), + VARCHAR_TYPE(18); private final int value; @@ -86,6 +87,8 @@ return NULL_TYPE; case 17: return DATE_TYPE; + case 18: + return VARCHAR_TYPE; default: return null; } Index: service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeQualifiers.java =================================================================== --- /dev/null +++ service/src/gen/thrift/gen-javabean/org/apache/hive/service/cli/thrift/TTypeQualifiers.java @@ -0,0 +1,584 @@ +/** + * Autogenerated by Thrift Compiler (0.9.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.hive.service.cli.thrift; + +import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.thrift.scheme.IScheme; +import org.apache.thrift.scheme.SchemeFactory; +import org.apache.thrift.scheme.StandardScheme; + +import org.apache.thrift.scheme.TupleScheme; +import org.apache.thrift.protocol.TTupleProtocol; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.EncodingUtils; +import org.apache.thrift.TException; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; +import java.util.EnumMap; +import java.util.Set; +import java.util.HashSet; +import java.util.EnumSet; +import java.util.Collections; +import java.util.BitSet; +import java.nio.ByteBuffer; +import java.util.Arrays; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TTypeQualifiers implements org.apache.thrift.TBase, java.io.Serializable, Cloneable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TTypeQualifiers"); + + private static final org.apache.thrift.protocol.TField CHARACTER_MAXIMUM_LENGTH_FIELD_DESC = new org.apache.thrift.protocol.TField("characterMaximumLength", org.apache.thrift.protocol.TType.I32, (short)1); + private static final org.apache.thrift.protocol.TField NUMERIC_PRECISION_FIELD_DESC = new org.apache.thrift.protocol.TField("numericPrecision", org.apache.thrift.protocol.TType.I32, (short)2); + private static final org.apache.thrift.protocol.TField NUMERIC_SCALE_FIELD_DESC = new org.apache.thrift.protocol.TField("numericScale", org.apache.thrift.protocol.TType.I32, (short)3); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new TTypeQualifiersStandardSchemeFactory()); + schemes.put(TupleScheme.class, new TTypeQualifiersTupleSchemeFactory()); + } + + private int characterMaximumLength; // optional + private int numericPrecision; // optional + private int numericScale; // optional + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + CHARACTER_MAXIMUM_LENGTH((short)1, "characterMaximumLength"), + NUMERIC_PRECISION((short)2, "numericPrecision"), + NUMERIC_SCALE((short)3, "numericScale"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // CHARACTER_MAXIMUM_LENGTH + return CHARACTER_MAXIMUM_LENGTH; + case 2: // NUMERIC_PRECISION + return NUMERIC_PRECISION; + case 3: // NUMERIC_SCALE + return NUMERIC_SCALE; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + private static final int __CHARACTERMAXIMUMLENGTH_ISSET_ID = 0; + private static final int __NUMERICPRECISION_ISSET_ID = 1; + private static final int __NUMERICSCALE_ISSET_ID = 2; + private byte __isset_bitfield = 0; + private _Fields optionals[] = {_Fields.CHARACTER_MAXIMUM_LENGTH,_Fields.NUMERIC_PRECISION,_Fields.NUMERIC_SCALE}; + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.CHARACTER_MAXIMUM_LENGTH, new org.apache.thrift.meta_data.FieldMetaData("characterMaximumLength", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); + tmpMap.put(_Fields.NUMERIC_PRECISION, new org.apache.thrift.meta_data.FieldMetaData("numericPrecision", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); + tmpMap.put(_Fields.NUMERIC_SCALE, new org.apache.thrift.meta_data.FieldMetaData("numericScale", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I32))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TTypeQualifiers.class, metaDataMap); + } + + public TTypeQualifiers() { + } + + /** + * Performs a deep copy on other. + */ + public TTypeQualifiers(TTypeQualifiers other) { + __isset_bitfield = other.__isset_bitfield; + this.characterMaximumLength = other.characterMaximumLength; + this.numericPrecision = other.numericPrecision; + this.numericScale = other.numericScale; + } + + public TTypeQualifiers deepCopy() { + return new TTypeQualifiers(this); + } + + @Override + public void clear() { + setCharacterMaximumLengthIsSet(false); + this.characterMaximumLength = 0; + setNumericPrecisionIsSet(false); + this.numericPrecision = 0; + setNumericScaleIsSet(false); + this.numericScale = 0; + } + + public int getCharacterMaximumLength() { + return this.characterMaximumLength; + } + + public void setCharacterMaximumLength(int characterMaximumLength) { + this.characterMaximumLength = characterMaximumLength; + setCharacterMaximumLengthIsSet(true); + } + + public void unsetCharacterMaximumLength() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __CHARACTERMAXIMUMLENGTH_ISSET_ID); + } + + /** Returns true if field characterMaximumLength is set (has been assigned a value) and false otherwise */ + public boolean isSetCharacterMaximumLength() { + return EncodingUtils.testBit(__isset_bitfield, __CHARACTERMAXIMUMLENGTH_ISSET_ID); + } + + public void setCharacterMaximumLengthIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __CHARACTERMAXIMUMLENGTH_ISSET_ID, value); + } + + public int getNumericPrecision() { + return this.numericPrecision; + } + + public void setNumericPrecision(int numericPrecision) { + this.numericPrecision = numericPrecision; + setNumericPrecisionIsSet(true); + } + + public void unsetNumericPrecision() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMERICPRECISION_ISSET_ID); + } + + /** Returns true if field numericPrecision is set (has been assigned a value) and false otherwise */ + public boolean isSetNumericPrecision() { + return EncodingUtils.testBit(__isset_bitfield, __NUMERICPRECISION_ISSET_ID); + } + + public void setNumericPrecisionIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMERICPRECISION_ISSET_ID, value); + } + + public int getNumericScale() { + return this.numericScale; + } + + public void setNumericScale(int numericScale) { + this.numericScale = numericScale; + setNumericScaleIsSet(true); + } + + public void unsetNumericScale() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMERICSCALE_ISSET_ID); + } + + /** Returns true if field numericScale is set (has been assigned a value) and false otherwise */ + public boolean isSetNumericScale() { + return EncodingUtils.testBit(__isset_bitfield, __NUMERICSCALE_ISSET_ID); + } + + public void setNumericScaleIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMERICSCALE_ISSET_ID, value); + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case CHARACTER_MAXIMUM_LENGTH: + if (value == null) { + unsetCharacterMaximumLength(); + } else { + setCharacterMaximumLength((Integer)value); + } + break; + + case NUMERIC_PRECISION: + if (value == null) { + unsetNumericPrecision(); + } else { + setNumericPrecision((Integer)value); + } + break; + + case NUMERIC_SCALE: + if (value == null) { + unsetNumericScale(); + } else { + setNumericScale((Integer)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case CHARACTER_MAXIMUM_LENGTH: + return Integer.valueOf(getCharacterMaximumLength()); + + case NUMERIC_PRECISION: + return Integer.valueOf(getNumericPrecision()); + + case NUMERIC_SCALE: + return Integer.valueOf(getNumericScale()); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case CHARACTER_MAXIMUM_LENGTH: + return isSetCharacterMaximumLength(); + case NUMERIC_PRECISION: + return isSetNumericPrecision(); + case NUMERIC_SCALE: + return isSetNumericScale(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof TTypeQualifiers) + return this.equals((TTypeQualifiers)that); + return false; + } + + public boolean equals(TTypeQualifiers that) { + if (that == null) + return false; + + boolean this_present_characterMaximumLength = true && this.isSetCharacterMaximumLength(); + boolean that_present_characterMaximumLength = true && that.isSetCharacterMaximumLength(); + if (this_present_characterMaximumLength || that_present_characterMaximumLength) { + if (!(this_present_characterMaximumLength && that_present_characterMaximumLength)) + return false; + if (this.characterMaximumLength != that.characterMaximumLength) + return false; + } + + boolean this_present_numericPrecision = true && this.isSetNumericPrecision(); + boolean that_present_numericPrecision = true && that.isSetNumericPrecision(); + if (this_present_numericPrecision || that_present_numericPrecision) { + if (!(this_present_numericPrecision && that_present_numericPrecision)) + return false; + if (this.numericPrecision != that.numericPrecision) + return false; + } + + boolean this_present_numericScale = true && this.isSetNumericScale(); + boolean that_present_numericScale = true && that.isSetNumericScale(); + if (this_present_numericScale || that_present_numericScale) { + if (!(this_present_numericScale && that_present_numericScale)) + return false; + if (this.numericScale != that.numericScale) + return false; + } + + return true; + } + + @Override + public int hashCode() { + HashCodeBuilder builder = new HashCodeBuilder(); + + boolean present_characterMaximumLength = true && (isSetCharacterMaximumLength()); + builder.append(present_characterMaximumLength); + if (present_characterMaximumLength) + builder.append(characterMaximumLength); + + boolean present_numericPrecision = true && (isSetNumericPrecision()); + builder.append(present_numericPrecision); + if (present_numericPrecision) + builder.append(numericPrecision); + + boolean present_numericScale = true && (isSetNumericScale()); + builder.append(present_numericScale); + if (present_numericScale) + builder.append(numericScale); + + return builder.toHashCode(); + } + + public int compareTo(TTypeQualifiers other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + TTypeQualifiers typedOther = (TTypeQualifiers)other; + + lastComparison = Boolean.valueOf(isSetCharacterMaximumLength()).compareTo(typedOther.isSetCharacterMaximumLength()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetCharacterMaximumLength()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.characterMaximumLength, typedOther.characterMaximumLength); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNumericPrecision()).compareTo(typedOther.isSetNumericPrecision()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumericPrecision()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numericPrecision, typedOther.numericPrecision); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNumericScale()).compareTo(typedOther.isSetNumericScale()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumericScale()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numericScale, typedOther.numericScale); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("TTypeQualifiers("); + boolean first = true; + + if (isSetCharacterMaximumLength()) { + sb.append("characterMaximumLength:"); + sb.append(this.characterMaximumLength); + first = false; + } + if (isSetNumericPrecision()) { + if (!first) sb.append(", "); + sb.append("numericPrecision:"); + sb.append(this.numericPrecision); + first = false; + } + if (isSetNumericScale()) { + if (!first) sb.append(", "); + sb.append("numericScale:"); + sb.append(this.numericScale); + first = false; + } + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + // check for sub-struct validity + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class TTypeQualifiersStandardSchemeFactory implements SchemeFactory { + public TTypeQualifiersStandardScheme getScheme() { + return new TTypeQualifiersStandardScheme(); + } + } + + private static class TTypeQualifiersStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, TTypeQualifiers struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 1: // CHARACTER_MAXIMUM_LENGTH + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.characterMaximumLength = iprot.readI32(); + struct.setCharacterMaximumLengthIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 2: // NUMERIC_PRECISION + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.numericPrecision = iprot.readI32(); + struct.setNumericPrecisionIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 3: // NUMERIC_SCALE + if (schemeField.type == org.apache.thrift.protocol.TType.I32) { + struct.numericScale = iprot.readI32(); + struct.setNumericScaleIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, TTypeQualifiers struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (struct.isSetCharacterMaximumLength()) { + oprot.writeFieldBegin(CHARACTER_MAXIMUM_LENGTH_FIELD_DESC); + oprot.writeI32(struct.characterMaximumLength); + oprot.writeFieldEnd(); + } + if (struct.isSetNumericPrecision()) { + oprot.writeFieldBegin(NUMERIC_PRECISION_FIELD_DESC); + oprot.writeI32(struct.numericPrecision); + oprot.writeFieldEnd(); + } + if (struct.isSetNumericScale()) { + oprot.writeFieldBegin(NUMERIC_SCALE_FIELD_DESC); + oprot.writeI32(struct.numericScale); + oprot.writeFieldEnd(); + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class TTypeQualifiersTupleSchemeFactory implements SchemeFactory { + public TTypeQualifiersTupleScheme getScheme() { + return new TTypeQualifiersTupleScheme(); + } + } + + private static class TTypeQualifiersTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + BitSet optionals = new BitSet(); + if (struct.isSetCharacterMaximumLength()) { + optionals.set(0); + } + if (struct.isSetNumericPrecision()) { + optionals.set(1); + } + if (struct.isSetNumericScale()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); + if (struct.isSetCharacterMaximumLength()) { + oprot.writeI32(struct.characterMaximumLength); + } + if (struct.isSetNumericPrecision()) { + oprot.writeI32(struct.numericPrecision); + } + if (struct.isSetNumericScale()) { + oprot.writeI32(struct.numericScale); + } + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, TTypeQualifiers struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + BitSet incoming = iprot.readBitSet(3); + if (incoming.get(0)) { + struct.characterMaximumLength = iprot.readI32(); + struct.setCharacterMaximumLengthIsSet(true); + } + if (incoming.get(1)) { + struct.numericPrecision = iprot.readI32(); + struct.setNumericPrecisionIsSet(true); + } + if (incoming.get(2)) { + struct.numericScale = iprot.readI32(); + struct.setNumericScaleIsSet(true); + } + } + } + +} + Index: service/src/gen/thrift/gen-py/TCLIService/ttypes.py =================================================================== --- service/src/gen/thrift/gen-py/TCLIService/ttypes.py +++ service/src/gen/thrift/gen-py/TCLIService/ttypes.py @@ -46,6 +46,7 @@ DECIMAL_TYPE = 15 NULL_TYPE = 16 DATE_TYPE = 17 + VARCHAR_TYPE = 18 _VALUES_TO_NAMES = { 0: "BOOLEAN_TYPE", @@ -66,6 +67,7 @@ 15: "DECIMAL_TYPE", 16: "NULL_TYPE", 17: "DATE_TYPE", + 18: "VARCHAR_TYPE", } _NAMES_TO_VALUES = { @@ -87,6 +89,7 @@ "DECIMAL_TYPE": 15, "NULL_TYPE": 16, "DATE_TYPE": 17, + "VARCHAR_TYPE": 18, } class TStatusCode: @@ -352,19 +355,106 @@ } +class TTypeQualifiers: + """ + Attributes: + - characterMaximumLength + - numericPrecision + - numericScale + """ + + thrift_spec = ( + None, # 0 + (1, TType.I32, 'characterMaximumLength', None, None, ), # 1 + (2, TType.I32, 'numericPrecision', None, None, ), # 2 + (3, TType.I32, 'numericScale', None, None, ), # 3 + ) + + def __init__(self, characterMaximumLength=None, numericPrecision=None, numericScale=None,): + self.characterMaximumLength = characterMaximumLength + self.numericPrecision = numericPrecision + self.numericScale = numericScale + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.I32: + self.characterMaximumLength = iprot.readI32(); + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.I32: + self.numericPrecision = iprot.readI32(); + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I32: + self.numericScale = iprot.readI32(); + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('TTypeQualifiers') + if self.characterMaximumLength is not None: + oprot.writeFieldBegin('characterMaximumLength', TType.I32, 1) + oprot.writeI32(self.characterMaximumLength) + oprot.writeFieldEnd() + if self.numericPrecision is not None: + oprot.writeFieldBegin('numericPrecision', TType.I32, 2) + oprot.writeI32(self.numericPrecision) + oprot.writeFieldEnd() + if self.numericScale is not None: + oprot.writeFieldBegin('numericScale', TType.I32, 3) + oprot.writeI32(self.numericScale) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + return + + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + class TPrimitiveTypeEntry: """ Attributes: - type + - typeQualifiers """ thrift_spec = ( None, # 0 (1, TType.I32, 'type', None, None, ), # 1 + (2, TType.STRUCT, 'typeQualifiers', (TTypeQualifiers, TTypeQualifiers.thrift_spec), None, ), # 2 ) - def __init__(self, type=None,): + def __init__(self, type=None, typeQualifiers=None,): self.type = type + self.typeQualifiers = typeQualifiers def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -380,6 +470,12 @@ self.type = iprot.readI32(); else: iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.typeQualifiers = TTypeQualifiers() + self.typeQualifiers.read(iprot) + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -394,6 +490,10 @@ oprot.writeFieldBegin('type', TType.I32, 1) oprot.writeI32(self.type) oprot.writeFieldEnd() + if self.typeQualifiers is not None: + oprot.writeFieldBegin('typeQualifiers', TType.STRUCT, 2) + self.typeQualifiers.write(oprot) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() Index: service/src/gen/thrift/gen-rb/t_c_l_i_service_types.rb =================================================================== --- service/src/gen/thrift/gen-rb/t_c_l_i_service_types.rb +++ service/src/gen/thrift/gen-rb/t_c_l_i_service_types.rb @@ -31,8 +31,9 @@ DECIMAL_TYPE = 15 NULL_TYPE = 16 DATE_TYPE = 17 - VALUE_MAP = {0 => "BOOLEAN_TYPE", 1 => "TINYINT_TYPE", 2 => "SMALLINT_TYPE", 3 => "INT_TYPE", 4 => "BIGINT_TYPE", 5 => "FLOAT_TYPE", 6 => "DOUBLE_TYPE", 7 => "STRING_TYPE", 8 => "TIMESTAMP_TYPE", 9 => "BINARY_TYPE", 10 => "ARRAY_TYPE", 11 => "MAP_TYPE", 12 => "STRUCT_TYPE", 13 => "UNION_TYPE", 14 => "USER_DEFINED_TYPE", 15 => "DECIMAL_TYPE", 16 => "NULL_TYPE", 17 => "DATE_TYPE"} - VALID_VALUES = Set.new([BOOLEAN_TYPE, TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, FLOAT_TYPE, DOUBLE_TYPE, STRING_TYPE, TIMESTAMP_TYPE, BINARY_TYPE, ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, UNION_TYPE, USER_DEFINED_TYPE, DECIMAL_TYPE, NULL_TYPE, DATE_TYPE]).freeze + VARCHAR_TYPE = 18 + VALUE_MAP = {0 => "BOOLEAN_TYPE", 1 => "TINYINT_TYPE", 2 => "SMALLINT_TYPE", 3 => "INT_TYPE", 4 => "BIGINT_TYPE", 5 => "FLOAT_TYPE", 6 => "DOUBLE_TYPE", 7 => "STRING_TYPE", 8 => "TIMESTAMP_TYPE", 9 => "BINARY_TYPE", 10 => "ARRAY_TYPE", 11 => "MAP_TYPE", 12 => "STRUCT_TYPE", 13 => "UNION_TYPE", 14 => "USER_DEFINED_TYPE", 15 => "DECIMAL_TYPE", 16 => "NULL_TYPE", 17 => "DATE_TYPE", 18 => "VARCHAR_TYPE"} + VALID_VALUES = Set.new([BOOLEAN_TYPE, TINYINT_TYPE, SMALLINT_TYPE, INT_TYPE, BIGINT_TYPE, FLOAT_TYPE, DOUBLE_TYPE, STRING_TYPE, TIMESTAMP_TYPE, BINARY_TYPE, ARRAY_TYPE, MAP_TYPE, STRUCT_TYPE, UNION_TYPE, USER_DEFINED_TYPE, DECIMAL_TYPE, NULL_TYPE, DATE_TYPE, VARCHAR_TYPE]).freeze end module TStatusCode @@ -134,12 +135,34 @@ VALID_VALUES = Set.new([FETCH_NEXT, FETCH_PRIOR, FETCH_RELATIVE, FETCH_ABSOLUTE, FETCH_FIRST, FETCH_LAST]).freeze end +class TTypeQualifiers + include ::Thrift::Struct, ::Thrift::Struct_Union + CHARACTERMAXIMUMLENGTH = 1 + NUMERICPRECISION = 2 + NUMERICSCALE = 3 + + FIELDS = { + CHARACTERMAXIMUMLENGTH => {:type => ::Thrift::Types::I32, :name => 'characterMaximumLength', :optional => true}, + NUMERICPRECISION => {:type => ::Thrift::Types::I32, :name => 'numericPrecision', :optional => true}, + NUMERICSCALE => {:type => ::Thrift::Types::I32, :name => 'numericScale', :optional => true} + } + + def struct_fields; FIELDS; end + + def validate + end + + ::Thrift::Struct.generate_accessors self +end + class TPrimitiveTypeEntry include ::Thrift::Struct, ::Thrift::Struct_Union TYPE = 1 + TYPEQUALIFIERS = 2 FIELDS = { - TYPE => {:type => ::Thrift::Types::I32, :name => 'type', :enum_class => ::TTypeId} + TYPE => {:type => ::Thrift::Types::I32, :name => 'type', :enum_class => ::TTypeId}, + TYPEQUALIFIERS => {:type => ::Thrift::Types::STRUCT, :name => 'typeQualifiers', :class => ::TTypeQualifiers, :optional => true} } def struct_fields; FIELDS; end Index: service/src/java/org/apache/hive/service/cli/ColumnDescriptor.java =================================================================== --- service/src/java/org/apache/hive/service/cli/ColumnDescriptor.java +++ service/src/java/org/apache/hive/service/cli/ColumnDescriptor.java @@ -50,11 +50,13 @@ public ColumnDescriptor(FieldSchema column, int position) { name = column.getName(); comment = column.getComment(); - type = new TypeDescriptor(column.getType()); + type = new TypeDescriptor(column); this.position = position; } public static ColumnDescriptor newPrimitiveColumnDescriptor(String name, String comment, Type type, int position) { + // Current usage looks like it's only for metadata columns, but if that changes then + // this method may need to require a type qualifiers aruments. return new ColumnDescriptor(name, comment, new TypeDescriptor(type), position); } Index: service/src/java/org/apache/hive/service/cli/Type.java =================================================================== --- service/src/java/org/apache/hive/service/cli/Type.java +++ service/src/java/org/apache/hive/service/cli/Type.java @@ -54,6 +54,9 @@ STRING_TYPE("STRING", java.sql.Types.VARCHAR, TTypeId.STRING_TYPE), + VARCHAR_TYPE("VARCHAR", + java.sql.Types.VARCHAR, + TTypeId.STRING_TYPE), DATE_TYPE("DATE", java.sql.Types.DATE, TTypeId.DATE_TYPE), Index: service/src/java/org/apache/hive/service/cli/TypeDescriptor.java =================================================================== --- service/src/java/org/apache/hive/service/cli/TypeDescriptor.java +++ service/src/java/org/apache/hive/service/cli/TypeDescriptor.java @@ -20,6 +20,7 @@ import java.util.List; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hive.service.cli.thrift.TPrimitiveTypeEntry; import org.apache.hive.service.cli.thrift.TTypeDesc; import org.apache.hive.service.cli.thrift.TTypeEntry; @@ -32,6 +33,7 @@ private final Type type; private String typeName = null; + private TypeQualifiers typeQualifiers = null; public TypeDescriptor(Type type) { this.type = type; @@ -41,6 +43,9 @@ List tTypeEntries = tTypeDesc.getTypes(); TPrimitiveTypeEntry top = tTypeEntries.get(0).getPrimitiveEntry(); this.type = Type.getType(top.getType()); + if (top.isSetTypeQualifiers()) { + setTypeQualifiers(TypeQualifiers.fromTTypeQualifiers(top.getTypeQualifiers())); + } } public TypeDescriptor(String typeName) { @@ -50,6 +55,10 @@ } } + public TypeDescriptor(FieldSchema fieldSchema) { + this(fieldSchema.getType()); + } + public Type getType() { return type; } @@ -68,4 +77,12 @@ return type.getName(); } } + + public TypeQualifiers getTypeQualifiers() { + return typeQualifiers; + } + + public void setTypeQualifiers(TypeQualifiers typeQualifiers) { + this.typeQualifiers = typeQualifiers; + } } Index: service/src/java/org/apache/hive/service/cli/TypeQualifiers.java =================================================================== --- /dev/null +++ service/src/java/org/apache/hive/service/cli/TypeQualifiers.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.service.cli; + +import org.apache.hive.service.cli.thrift.TTypeQualifiers; + +public class TypeQualifiers { + private Integer characterMaximumLength; + private Integer numericPrecision; + private Integer numericScale; + + public TypeQualifiers() {} + + public Integer getCharacterMaximumLength() { + return characterMaximumLength; + } + public void setCharacterMaximumLength(int characterMaximumLength) { + this.characterMaximumLength = characterMaximumLength; + } + public Integer getNumericPrecision() { + return numericPrecision; + } + public void setNumericPrecision(Integer numericPrecision) { + this.numericPrecision = numericPrecision; + } + public Integer getNumericScale() { + return numericScale; + } + public void setNumericScale(Integer numericScale) { + this.numericScale = numericScale; + } + + public static TypeQualifiers fromTTypeQualifiers(TTypeQualifiers ttq) { + TypeQualifiers ret = null; + if (ttq != null) { + ret = new TypeQualifiers(); + if (ttq.isSetCharacterMaximumLength()) { + ret.setCharacterMaximumLength(ttq.getCharacterMaximumLength()); + } + if (ttq.isSetNumericPrecision()) { + ret.setNumericPrecision(ttq.getNumericPrecision()); + } + if (ttq.isSetNumericScale()) { + ret.setNumericScale(ttq.getNumericScale()); + } + } + return ret; + } +}