diff --git contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java index 8bffa02..8fcb3b3 100644 --- contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java +++ contrib/src/java/org/apache/hadoop/hive/contrib/util/typedbytes/TypedBytesRecordReader.java @@ -41,6 +41,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -90,8 +91,8 @@ public void initialize(InputStream in, Configuration conf, Properties tbl) throw for (String columnType : columnTypes) { PrimitiveTypeEntry dstTypeEntry = PrimitiveObjectInspectorUtils .getTypeEntryFromTypeName(columnType); - dstOIns.add(PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(dstTypeEntry.primitiveCategory)); + dstOIns.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + dstTypeEntry)); } } @@ -154,8 +155,8 @@ public int next(Writable data) throws IOException { PrimitiveTypeEntry srcTypeEntry = PrimitiveObjectInspectorUtils .getTypeEntryFromTypeName(typeName); srcOIns - .add(PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(srcTypeEntry.primitiveCategory)); + .add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + srcTypeEntry)); converters.add(ObjectInspectorConverters.getConverter(srcOIns.get(pos), dstOIns.get(pos))); } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index f0505b6..d2265e2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -130,7 +130,7 @@ private void initialize() { List inspectors = new ArrayList(vcCols.size()); for (VirtualColumn vc : vcCols) { inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - vc.getTypeInfo().getPrimitiveCategory())); + vc.getTypeInfo())); names.add(vc.getName()); } vcsOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 13330b2..dc9c826 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.udf.GenericUDFDecode; import org.apache.hadoop.hive.ql.udf.GenericUDFEncode; +import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.UDAFPercentile; import org.apache.hadoop.hive.ql.udf.UDFAbs; import org.apache.hadoop.hive.ql.udf.UDFAcos; @@ -1281,18 +1282,38 @@ public static GenericUDF cloneGenericUDF(GenericUDF genericUDF) { return null; } + GenericUDF clonedUDF = null; if (genericUDF instanceof GenericUDFBridge) { GenericUDFBridge bridge = (GenericUDFBridge) genericUDF; - return new GenericUDFBridge(bridge.getUdfName(), bridge.isOperator(), + clonedUDF = new GenericUDFBridge(bridge.getUdfName(), bridge.isOperator(), bridge.getUdfClassName()); } else if (genericUDF instanceof GenericUDFMacro) { GenericUDFMacro bridge = (GenericUDFMacro) genericUDF; - return new GenericUDFMacro(bridge.getMacroName(), bridge.getBody(), + clonedUDF = new GenericUDFMacro(bridge.getMacroName(), bridge.getBody(), bridge.getColNames(), bridge.getColTypes()); + } else { + clonedUDF = (GenericUDF) ReflectionUtils + .newInstance(genericUDF.getClass(), null); + } + + if (clonedUDF != null) { + // The original may have settable info that needs to be added to the new copy. + if (genericUDF instanceof SettableUDF) { + try { + Object settableData = ((SettableUDF)genericUDF).getParams(); + if (settableData != null) { + ((SettableUDF)clonedUDF).setParams(settableData); + } + } catch (UDFArgumentException err) { + // In theory this should not happen - if the original copy of the UDF had this + // data, we should be able to set the UDF copy with this same settableData. + LOG.error("Unable to add settable data to UDF " + genericUDF.getClass()); + throw new IllegalArgumentException(err); + } + } } - return (GenericUDF) ReflectionUtils - .newInstance(genericUDF.getClass(), null); + return clonedUDF; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java index 18a4b02..2bc7e86 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/VirtualColumn.java @@ -138,7 +138,7 @@ public static StructObjectInspector getVCSObjectInspector(List vc for (VirtualColumn vc : vcs) { names.add(vc.getName()); inspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - vc.getTypeInfo().getPrimitiveCategory())); + vc.getTypeInfo())); } return ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java index cd9f693..9e15d27 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseUtils.java @@ -24,11 +24,10 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.ErrorMsg; - -import java.util.Iterator; -import java.util.Map; - -import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** @@ -100,4 +99,37 @@ private ParseUtils() { } return colNames; } + + /** + * @param column column expression to convert + * @param tableFieldTypeInfo TypeInfo to convert to + * @return Expression converting column to the type specified by tableFieldTypeInfo + */ + static ExprNodeDesc createConversionCast(ExprNodeDesc column, PrimitiveTypeInfo tableFieldTypeInfo) + throws SemanticException { + ExprNodeDesc ret; + + // Get base type, since type string may be parameterized + String baseType = TypeInfoUtils.getBaseName(tableFieldTypeInfo.getTypeName()); + BaseTypeParams typeParams = null; + // If TypeInfo is parameterized, provide the params to the UDF factory method. + typeParams = tableFieldTypeInfo.getTypeParams(); + if (typeParams != null) { + switch (tableFieldTypeInfo.getPrimitiveCategory()) { + // No parameterized types yet + default: + throw new SemanticException("Type cast for " + tableFieldTypeInfo.getPrimitiveCategory() + + " does not take type parameters"); + } + } + + // If the type cast UDF is for a parameterized type, then it should implement + // the SettableUDF interface so that we can pass in the params. + // Not sure if this is the cleanest solution, but there does need to be a way + // to provide the type params to the type cast. + ret = TypeCheckProcFactory.DefaultExprProcessor + .getFuncExprNodeDescWithUdfData(baseType, typeParams, column); + + return ret; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 6d50109..72d7b2c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -68,7 +68,6 @@ import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.exec.TaskFactory; -import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UnionOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; @@ -168,6 +167,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -5394,9 +5394,8 @@ Operator genConversionSelectOperator(String dest, QB qb, Operator input, // cannot convert to complex types column = null; } else { - column = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), - column); + column = ParseUtils.createConversionCast( + column, (PrimitiveTypeInfo)tableFieldTypeInfo); } if (column == null) { String reason = "Cannot convert column " + i + " from " @@ -5638,9 +5637,8 @@ private Operator genLimitMapRedPlan(String dest, QB qb, Operator input, // cannot convert to complex types column = null; } else { - column = TypeCheckProcFactory.DefaultExprProcessor - .getFuncExprNodeDesc(tableFieldTypeInfo.getTypeName(), - column); + column = ParseUtils.createConversionCast( + column, (PrimitiveTypeInfo)tableFieldTypeInfo); } if (column == null) { String reason = "Cannot convert column " + posn + " from " @@ -6217,11 +6215,13 @@ private void genJoinOperatorTypeCheck(Operator left, Operator[] right) } // Add implicit type conversion if necessary for (int i = 0; i < right.length; i++) { - if (!commonType.equals(keys.get(i).get(k).getTypeInfo())) { + if (TypeInfoUtils.isConversionRequiredForComparison( + keys.get(i).get(k).getTypeInfo(), + commonType)) { keys.get(i).set( k, - TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( - commonType.getTypeName(), keys.get(i).get(k))); + ParseUtils.createConversionCast( + keys.get(i).get(k), (PrimitiveTypeInfo)commonType)); } } } @@ -7600,12 +7600,12 @@ private Operator genUnionPlan(String unionalias, String leftalias, * @param unionalias * The alias of the union. * @return - * @throws UDFArgumentException + * @throws SemanticException */ private Operator genInputSelectForUnion( Operator origInputOp, Map origInputFieldMap, String origInputAlias, RowResolver unionoutRR, String unionalias) - throws UDFArgumentException { + throws SemanticException { List columns = new ArrayList(); boolean needsCast = false; @@ -7616,8 +7616,8 @@ private Operator genUnionPlan(String unionalias, String leftalias, lInfo.getTabAlias(), lInfo.getIsVirtualCol(), lInfo.isSkewedCol()); if (!lInfo.getType().equals(unionEntry.getValue().getType())) { needsCast = true; - column = TypeCheckProcFactory.DefaultExprProcessor.getFuncExprNodeDesc( - unionEntry.getValue().getType().getTypeName(), column); + column = ParseUtils.createConversionCast( + column, (PrimitiveTypeInfo)unionEntry.getValue().getType()); } columns.add(column); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 048824a..a912882 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -55,6 +55,7 @@ import org.apache.hadoop.hive.ql.plan.ExprNodeFieldDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc; +import org.apache.hadoop.hive.ql.udf.SettableUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBaseCompare; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual; @@ -643,7 +644,7 @@ public static String getFunctionText(ASTNode expr, boolean isFunction) { * * @throws UDFArgumentException */ - public static ExprNodeDesc getFuncExprNodeDesc(String udfName, + static ExprNodeDesc getFuncExprNodeDescWithUdfData(String udfName, Object udfData, ExprNodeDesc... children) throws UDFArgumentException { FunctionInfo fi = FunctionRegistry.getFunctionInfo(udfName); @@ -657,11 +658,23 @@ public static ExprNodeDesc getFuncExprNodeDesc(String udfName, + " is an aggregation function or a table function."); } + // Add udfData to UDF if necessary + if (udfData != null) { + if (genericUDF instanceof SettableUDF) { + ((SettableUDF)genericUDF).setParams(udfData); + } + } + List childrenList = new ArrayList(children.length); childrenList.addAll(Arrays.asList(children)); return ExprNodeGenericFuncDesc.newInstance(genericUDF, childrenList); } + public static ExprNodeDesc getFuncExprNodeDesc(String udfName, + ExprNodeDesc... children) throws UDFArgumentException { + return getFuncExprNodeDescWithUdfData(udfName, null, children); + } + static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, boolean isFunction, ArrayList children, TypeCheckCtx ctx) throws SemanticException, UDFArgumentException { @@ -758,11 +771,26 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, } } + // getGenericUDF() actually clones the UDF. Just call it once and reuse. + GenericUDF genericUDF = fi.getGenericUDF(); + if (!fi.isNative()) { ctx.getUnparseTranslator().addIdentifierTranslation( (ASTNode) expr.getChild(0)); } + // Handle type casts that may contain type parameters + if (isFunction) { + ASTNode funcNameNode = (ASTNode)expr.getChild(0); + switch (funcNameNode.getType()) { + // Get type param from AST and add to cast function. + // But, no parameterized types to handle at the moment + default: + // Do nothing + break; + } + } + // Detect UDTF's in nested SELECT, GROUP BY, etc as they aren't // supported if (fi.getGenericUDTF() != null) { @@ -777,8 +805,8 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, throw new SemanticException(ErrorMsg.UDAF_INVALID_LOCATION.getMsg(expr)); } } - if (!ctx.getAllowStatefulFunctions() && (fi.getGenericUDF() != null)) { - if (FunctionRegistry.isStateful(fi.getGenericUDF())) { + if (!ctx.getAllowStatefulFunctions() && (genericUDF != null)) { + if (FunctionRegistry.isStateful(genericUDF)) { throw new SemanticException( ErrorMsg.UDF_STATEFUL_INVALID_LOCATION.getMsg()); } @@ -786,7 +814,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, // Try to infer the type of the constant only if there are two // nodes, one of them is column and the other is numeric const - if (fi.getGenericUDF() instanceof GenericUDFBaseCompare + if (genericUDF instanceof GenericUDFBaseCompare && children.size() == 2 && ((children.get(0) instanceof ExprNodeConstantDesc && children.get(1) instanceof ExprNodeColumnDesc) @@ -843,7 +871,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, // however, if we already tried this, or the column is NUMBER type and // the operator is EQUAL, return false due to the type mismatch if (triedDouble || - (fi.getGenericUDF() instanceof GenericUDFOPEqual + (genericUDF instanceof GenericUDFOPEqual && !columnType.equals(serdeConstants.STRING_TYPE_NAME))) { return new ExprNodeConstantDesc(false); } @@ -861,7 +889,7 @@ static ExprNodeDesc getXpathOrFuncExprNodeDesc(ASTNode expr, } } - desc = ExprNodeGenericFuncDesc.newInstance(fi.getGenericUDF(), children); + desc = ExprNodeGenericFuncDesc.newInstance(genericUDF, children); } // UDFOPPositive is a no-op. // However, we still create it, and then remove it here, to make sure we diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/SettableUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/SettableUDF.java new file mode 100644 index 0000000..9225aa1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/SettableUDF.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf; + +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; + +/** + * THIS INTERFACE IS UNSTABLE AND SHOULD NOT BE USED BY 3RD PARTY UDFS. + * Interface to allow passing of parameters to the UDF, before it is initialized. + * For example, to be able to pass the char length parameters to a char type cast. + */ +public interface SettableUDF { + + /** + * Add data to UDF prior to initialization. + * An exception may be thrown if the UDF doesn't know what to do with this data. + * @param params UDF-specific data to add to the UDF + */ + void setParams(Object params) throws UDFArgumentException; + + Object getParams(); + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java index 21cf616..ac81ab8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDe.java @@ -35,8 +35,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; import org.apache.hadoop.hive.serde2.thrift.ConfigurableTProtocol; import org.apache.hadoop.hive.serde2.thrift.TReflectionUtils; +import org.apache.hadoop.hive.serde2.typeinfo.ParameterizedPrimitiveTypeUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -173,9 +175,9 @@ public static ObjectInspector dynamicSerDeStructBaseToObjectInspector( dynamicSerDeStructBaseToObjectInspector(btMap.getKeyType()), dynamicSerDeStructBaseToObjectInspector(btMap.getValueType())); } else if (bt.isPrimitive()) { - return PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils - .getTypeEntryFromPrimitiveJavaClass(bt.getRealType()).primitiveCategory); + PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils + .getTypeEntryFromPrimitiveJavaClass(bt.getRealType()); + return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pte); } else { // Must be a struct DynamicSerDeStructBase btStruct = (DynamicSerDeStructBase) bt; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index a08b4a8..a0ff609 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -27,18 +27,18 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyVoidObjectInspector; import org.apache.hadoop.hive.serde2.lazydio.LazyDioBoolean; @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -215,9 +216,9 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, ObjectInspector.Category c = typeInfo.getCategory(); switch (c) { case PRIMITIVE: + BaseTypeParams typeParams = ((PrimitiveTypeInfo)typeInfo).getTypeParams(); return LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector( - ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), escaped, - escapeChar); + (PrimitiveTypeInfo) typeInfo, escaped, escapeChar); case MAP: return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( createLazyObjectInspector(((MapTypeInfo) typeInfo) diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index afbf454..2f09224 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -21,7 +21,11 @@ import java.util.ArrayList; import java.util.HashMap; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; /** * LazyPrimitiveObjectInspectorFactory is the primary way to create new @@ -79,9 +83,32 @@ public static LazyStringObjectInspector getLazyStringObjectInspector( return result; } + static PrimitiveObjectInspectorUtils.ParameterizedObjectInspectorMap + cachedParameterizedLazyObjectInspectors = + new PrimitiveObjectInspectorUtils.ParameterizedObjectInspectorMap(); + + public static PrimitiveObjectInspector getParameterizedObjectInspector( + PrimitiveTypeSpec typeSpec) { + PrimitiveCategory primitiveCategory = typeSpec.getPrimitiveCategory(); + BaseTypeParams typeParams = typeSpec.getTypeParams(); + PrimitiveObjectInspector poi = + cachedParameterizedLazyObjectInspectors.getObjectInspector(typeSpec); + if (poi == null) { + // Object inspector hasn't been cached for this type/params yet, create now + switch (primitiveCategory) { + // Get type entry for parameterized type, and create new object inspector for type + // Currently no parameterized types + + default: + throw new RuntimeException( + "Primitve type " + primitiveCategory + " should not take parameters"); + } + } + + return poi; + } public static AbstractPrimitiveLazyObjectInspector getLazyObjectInspector( PrimitiveCategory primitiveCategory, boolean escaped, byte escapeChar) { - switch (primitiveCategory) { case BOOLEAN: return LAZY_BOOLEAN_OBJECT_INSPECTOR; @@ -115,6 +142,23 @@ public static LazyStringObjectInspector getLazyStringObjectInspector( } } + public static AbstractPrimitiveLazyObjectInspector getLazyObjectInspector( + PrimitiveTypeSpec typeSpec, boolean escaped, byte escapeChar) { + PrimitiveCategory primitiveCategory = typeSpec.getPrimitiveCategory(); + BaseTypeParams typeParams = typeSpec.getTypeParams(); + + if (typeParams == null) { + return getLazyObjectInspector(primitiveCategory, escaped, escapeChar); + } else { + switch(primitiveCategory) { + // call getParameterizedObjectInspector(). But no parameterized types yet + + default: + throw new RuntimeException("Type " + primitiveCategory + " does not take parameters"); + } + } + } + private LazyPrimitiveObjectInspectorFactory() { // prevent instantiation } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java index 6ad8704..a22c04d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -425,8 +425,7 @@ public static ObjectInspector getLazyBinaryObjectInspectorFromTypeInfo( switch (typeInfo.getCategory()) { case PRIMITIVE: { result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(((PrimitiveTypeInfo) typeInfo) - .getPrimitiveCategory()); + .getPrimitiveWritableObjectInspector(((PrimitiveTypeInfo) typeInfo)); break; } case LIST: { diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index 896193e..36df3cd 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -172,8 +172,7 @@ public static ObjectInspector getConvertedOI( switch (outputOI.getCategory()) { case PRIMITIVE: PrimitiveObjectInspector primInputOI = (PrimitiveObjectInspector) inputOI; - return PrimitiveObjectInspectorFactory. - getPrimitiveWritableObjectInspector(primInputOI.getPrimitiveCategory()); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primInputOI); case STRUCT: StructObjectInspector structOutputOI = (StructObjectInspector) outputOI; if (structOutputOI.isSettable()) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index f7418bc..db7028a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -86,8 +86,7 @@ public static ObjectInspector getWritableObjectInspector(ObjectInspector oi) { if (oi.getCategory() == Category.PRIMITIVE) { PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; if (!(poi instanceof AbstractPrimitiveWritableObjectInspector)) { - return PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(poi); } } return oi; @@ -111,22 +110,20 @@ public static ObjectInspector getStandardObjectInspector(ObjectInspector oi, switch (objectInspectorOption) { case DEFAULT: { if (poi.preferWritable()) { - result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(poi); } else { result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(poi.getPrimitiveCategory()); + .getPrimitiveJavaObjectInspector(poi); } break; } case JAVA: { result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(poi.getPrimitiveCategory()); + .getPrimitiveJavaObjectInspector(poi); break; } case WRITABLE: { - result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(poi.getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(poi); break; } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java index f1b8bdf..353a99c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java @@ -17,11 +17,14 @@ */ package org.apache.hadoop.hive.serde2.objectinspector; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; + /** * PrimitiveObjectInspector. * */ -public interface PrimitiveObjectInspector extends ObjectInspector { +public interface PrimitiveObjectInspector extends ObjectInspector, PrimitiveTypeSpec { /** * The primitive types supported by Hive. @@ -74,4 +77,17 @@ * most efficient way to getting data out of the Object. */ boolean preferWritable(); + + /** + * If the type has type parameters (such as varchar length, or decimal precision/scale), + * then return the parameters for the type. + * @return A BaseTypeParams object representing the parameters for the type, or null + */ + BaseTypeParams getTypeParams(); + + /** + * Set the type parameters for the type. + * @param newParams type parameters for the type + */ + void setTypeParams(BaseTypeParams newParams); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java index 8620af3..e42d7c6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/AbstractPrimitiveObjectInspector.java @@ -19,6 +19,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; /** * An AbstractPrimitiveObjectInspector is based on @@ -28,6 +30,7 @@ PrimitiveObjectInspector { transient PrimitiveTypeEntry typeEntry; + protected BaseTypeParams typeParams; /** * Construct a AbstractPrimitiveObjectInspector. @@ -76,7 +79,18 @@ public Category getCategory() { */ @Override public String getTypeName() { - return typeEntry.typeName; + return typeEntry.toString(); } + public BaseTypeParams getTypeParams() { + return typeParams; + } + + public void setTypeParams(BaseTypeParams newParams) { + if (typeParams != null && !typeEntry.isParameterized()) { + throw new UnsupportedOperationException( + "Attempting to add type parameters " + typeParams + " to type " + getTypeName()); + } + this.typeParams = newParams; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java index 281f730..12e06dd 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -20,16 +20,19 @@ import java.util.HashMap; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; +import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.ParameterizedObjectInspectorMap; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -166,6 +169,20 @@ } /** + * Cached Writable object inspectors for parameterized primitive types. + */ + private static ParameterizedObjectInspectorMap + cachedParameterizedPrimitiveWritableObjectInspectorCache = + new ParameterizedObjectInspectorMap(); + + /** + * Cached Java object inspectors for parameterized primitive types. + */ + private static ParameterizedObjectInspectorMap + cachedParameterizedPrimitiveJavaObjectInspectorCache = + new ParameterizedObjectInspectorMap(); + + /** * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory. * * @param primitiveCategory @@ -182,6 +199,47 @@ public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjec } /** + * Returns the PrimitiveWritableObjectInspector for the PrimitiveCategory, with option to + * pass in parameters for the primitive type (such as char(10)). + * Ideally this method should be used over the method without type parameters, + * and the type parameters (or lack of parameters) can be determined from + * the input ObjectInspector, TypeInfo, or TypeEntry. + * However there are situations where it is not possible to get any information about + * type parameters, such as when getting an object inspector based on reflection from + * the java or primitive class. + * @param primitiveCategory Primitve type category + * @param primitiveTypeParams Type parameters for the primitve type. + * Set to null if there are no type parameters + * @return + */ + public static AbstractPrimitiveWritableObjectInspector getPrimitiveWritableObjectInspector( + PrimitiveTypeSpec typeSpec) { + PrimitiveCategory primitiveCategory = typeSpec.getPrimitiveCategory(); + BaseTypeParams primitiveTypeParams = typeSpec.getTypeParams(); + + if (primitiveTypeParams == null) { + // No type params, just search the unparameterized types + return getPrimitiveWritableObjectInspector(primitiveCategory); + } else { + // Check our cached set of parameterized object inspectors for the primitive category, + // or create a new object inspector if one doesn't exist yet. + PrimitiveObjectInspector oi = + cachedParameterizedPrimitiveWritableObjectInspectorCache.getObjectInspector( + typeSpec); + if (oi == null) { + // Do a bit of validation - not all primitive types use parameters. + switch (primitiveCategory) { + // Currently no parameterized types + default: + throw new RuntimeException( + "Primitve type " + primitiveCategory + " should not take parameters"); + } + } + return (AbstractPrimitiveWritableObjectInspector)oi; + } + } + + /** * Returns a PrimitiveWritableObjectInspector which implements ConstantObjectInspector * for the PrimitiveCategory. * @@ -240,6 +298,47 @@ public static AbstractPrimitiveJavaObjectInspector getPrimitiveJavaObjectInspect } /** + * Returns the PrimitiveJavaObjectInspector for the PrimitiveCategory, with option to + * pass in parameters for the primitive type (such as char(10)). + * Ideally this method should be used over the method without type parameters, + * and the type parameters (or lack of parameters) can be determined from + * the input ObjectInspector, TypeInfo, or TypeEntry. + * However there are situations where it is not possible to get any information about + * type parameters, such as when getting an object inspector based on reflection from + * the java or primitive class. + * @param primitiveCategory Primitve type category + * @param primitiveTypeParams Type parameters for the primitve type. + * Set to null if there are no type parameters + * @return + */ + public static AbstractPrimitiveJavaObjectInspector getPrimitiveJavaObjectInspector( + PrimitiveTypeSpec typeSpec) { + PrimitiveCategory primitiveCategory = typeSpec.getPrimitiveCategory(); + BaseTypeParams primitiveTypeParams = typeSpec.getTypeParams(); + + if (primitiveTypeParams == null) { + // No type params, just search the unparameterized types + return getPrimitiveJavaObjectInspector(primitiveCategory); + } else { + // Check our cached set of parameterized object inspectors for the primitive category, + // or create a new object inspector if one doesn't exist yet. + PrimitiveObjectInspector oi = + cachedParameterizedPrimitiveJavaObjectInspectorCache.getObjectInspector( + typeSpec); + if (oi == null) { + // Do a bit of validation - not all primitive types use parameters. + switch (primitiveCategory) { + // Create type info and add to cache + // Currently no existing parameterized types + default: + throw new RuntimeException( + "Primitve type " + primitiveCategory + " should not take parameters"); + } + } + return (AbstractPrimitiveJavaObjectInspector)oi; + } + } + /** * Returns an ObjectInspector for a primitive Class. The Class can be a Hive * Writable class, or a Java Primitive Class. * diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index 7b8f947..80c0d86 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -26,8 +26,11 @@ import java.util.HashMap; import java.util.Map; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; @@ -40,6 +43,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.BaseTypeParams; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeSpec; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -58,11 +63,12 @@ * ObjectInspector to return to the caller of SerDe2.getObjectInspector(). */ public final class PrimitiveObjectInspectorUtils { + private static Log LOG = LogFactory.getLog(PrimitiveObjectInspectorUtils.class); /** * TypeEntry stores information about a Hive Primitive TypeInfo. */ - public static class PrimitiveTypeEntry implements Writable { + public static class PrimitiveTypeEntry implements Writable, Cloneable, PrimitiveTypeSpec { /** * The category of the PrimitiveType. @@ -87,15 +93,18 @@ * typeName is the name of the type as in DDL. */ public String typeName; + public Class typeParamsClass; + public BaseTypeParams typeParams; PrimitiveTypeEntry( PrimitiveObjectInspector.PrimitiveCategory primitiveCategory, String typeName, Class primitiveType, Class javaClass, - Class hiveClass) { + Class hiveClass, ClassparamsClass) { this.primitiveCategory = primitiveCategory; primitiveJavaType = primitiveType; primitiveJavaClass = javaClass; primitiveWritableClass = hiveClass; + typeParamsClass = paramsClass; this.typeName = typeName; } @@ -104,22 +113,117 @@ public void readFields(DataInput in) throws IOException { primitiveCategory = WritableUtils.readEnum(in, PrimitiveObjectInspector.PrimitiveCategory.class); typeName = WritableUtils.readString(in); + int typeParamsIndicator = WritableUtils.readVInt(in); try { primitiveJavaType = Class.forName(WritableUtils.readString(in)); primitiveJavaClass = Class.forName(WritableUtils.readString(in)); primitiveWritableClass = Class.forName(WritableUtils.readString(in)); + if (typeParamsIndicator == 1) { + typeParamsClass = Class.forName(WritableUtils.readString(in)); + typeParams = (BaseTypeParams)typeParamsClass.newInstance(); + typeParams.readFields(in); + } else { + typeParamsClass = null; + typeParams = null; + } } catch (ClassNotFoundException e) { throw new IOException(e); + } catch (IllegalAccessException e) { + throw new IOException(e); + } catch (InstantiationException e) { + throw new IOException(e); } } @Override public void write(DataOutput out) throws IOException { + int typeParamsIndicator = (isParameterized() && typeParams != null) ? 1 : 0; + WritableUtils.writeEnum(out, primitiveCategory); WritableUtils.writeString(out, typeName); + WritableUtils.writeVInt(out, typeParamsIndicator); WritableUtils.writeString(out, primitiveJavaType.getName()); WritableUtils.writeString(out, primitiveJavaClass.getName()); WritableUtils.writeString(out, primitiveWritableClass.getName()); + if (typeParamsIndicator == 1) { + WritableUtils.writeString(out, typeParamsClass.getName()); + typeParams.write(out); + } + } + + public PrimitiveTypeEntry addParameters(String[] parameters) { + if (parameters == null || parameters.length == 0) { + return this; + } + + PrimitiveTypeEntry result; + try { + BaseTypeParams newTypeParams = (BaseTypeParams)typeParamsClass.newInstance(); + newTypeParams.set(parameters); + String typeNameWithParams = this.typeName + newTypeParams.toString(); + if (typeNameToTypeEntry.containsKey(typeNameWithParams)) { + return typeNameToTypeEntry.get(typeNameWithParams); + } + result = (PrimitiveTypeEntry)this.clone(); + result.typeParams = newTypeParams; + + PrimitiveObjectInspectorUtils.addParameterizedType(result); + + return result; + } catch (Exception err) { + LOG.error("Error while setting type parameters: " + err); + return null; + } + } + + public boolean isParameterized() { + return (null != typeParamsClass); + } + + @Override + public Object clone() { + PrimitiveTypeEntry result = new PrimitiveTypeEntry( + this.primitiveCategory, + this.typeName, + this.primitiveJavaType, + this.primitiveJavaClass, + this.primitiveWritableClass, + this.typeParamsClass); + return result; + } + + @Override + public String toString() { + if (typeParams != null) { + return typeName + typeParams.toString(); + } + return typeName; + } + + public static BaseTypeParams createTypeParams(String typeName, String[] parameters) + throws SerDeException { + try { + PrimitiveTypeEntry typeEntry = getTypeEntryFromTypeName(typeName); + if (typeEntry != null && typeEntry.typeParamsClass != null) { + BaseTypeParams newTypeParams = (BaseTypeParams)typeEntry.typeParamsClass.newInstance(); + newTypeParams.set(parameters); + return newTypeParams; + } else { + return null; + } + } catch (Exception err) { + throw new SerDeException("Error creating type params for " + typeName, err); + } + } + + @Override + public PrimitiveCategory getPrimitiveCategory() { + return primitiveCategory; + } + + @Override + public BaseTypeParams getTypeParams() { + return typeParams; } } @@ -129,6 +233,10 @@ public void write(DataOutput out) throws IOException { static final Map, PrimitiveTypeEntry> primitiveWritableClassToTypeEntry = new HashMap, PrimitiveTypeEntry>(); static final Map typeNameToTypeEntry = new HashMap(); + static void addParameterizedType(PrimitiveTypeEntry t) { + typeNameToTypeEntry.put(t.toString(), t); + } + static void registerType(PrimitiveTypeEntry t) { if (t.primitiveCategory != PrimitiveCategory.UNKNOWN) { primitiveCategoryToTypeEntry.put(t.primitiveCategory, t); @@ -149,49 +257,49 @@ static void registerType(PrimitiveTypeEntry t) { public static final PrimitiveTypeEntry binaryTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.BINARY, serdeConstants.BINARY_TYPE_NAME, byte[].class, - byte[].class, BytesWritable.class); + byte[].class, BytesWritable.class, null); public static final PrimitiveTypeEntry stringTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.STRING, serdeConstants.STRING_TYPE_NAME, null, String.class, - Text.class); + Text.class, null); public static final PrimitiveTypeEntry booleanTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.BOOLEAN, serdeConstants.BOOLEAN_TYPE_NAME, Boolean.TYPE, - Boolean.class, BooleanWritable.class); + Boolean.class, BooleanWritable.class, null); public static final PrimitiveTypeEntry intTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.INT, serdeConstants.INT_TYPE_NAME, Integer.TYPE, - Integer.class, IntWritable.class); + Integer.class, IntWritable.class, null); public static final PrimitiveTypeEntry longTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.LONG, serdeConstants.BIGINT_TYPE_NAME, Long.TYPE, - Long.class, LongWritable.class); + Long.class, LongWritable.class, null); public static final PrimitiveTypeEntry floatTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.FLOAT, serdeConstants.FLOAT_TYPE_NAME, Float.TYPE, - Float.class, FloatWritable.class); + Float.class, FloatWritable.class, null); public static final PrimitiveTypeEntry voidTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.VOID, serdeConstants.VOID_TYPE_NAME, Void.TYPE, Void.class, - NullWritable.class); + NullWritable.class, null); // No corresponding Writable classes for the following 3 in hadoop 0.17.0 public static final PrimitiveTypeEntry doubleTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DOUBLE, serdeConstants.DOUBLE_TYPE_NAME, Double.TYPE, - Double.class, DoubleWritable.class); + Double.class, DoubleWritable.class, null); public static final PrimitiveTypeEntry byteTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.BYTE, serdeConstants.TINYINT_TYPE_NAME, Byte.TYPE, - Byte.class, ByteWritable.class); + Byte.class, ByteWritable.class, null); public static final PrimitiveTypeEntry shortTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.SHORT, serdeConstants.SMALLINT_TYPE_NAME, Short.TYPE, - Short.class, ShortWritable.class); + Short.class, ShortWritable.class, null); public static final PrimitiveTypeEntry dateTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DATE, serdeConstants.DATE_TYPE_NAME, null, - Date.class, DateWritable.class); + Date.class, DateWritable.class, null); public static final PrimitiveTypeEntry timestampTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME, null, - Timestamp.class, TimestampWritable.class); + Timestamp.class, TimestampWritable.class, null); public static final PrimitiveTypeEntry decimalTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.DECIMAL, serdeConstants.DECIMAL_TYPE_NAME, null, - HiveDecimal.class, HiveDecimalWritable.class); + HiveDecimal.class, HiveDecimalWritable.class, null); // The following is a complex type for special handling public static final PrimitiveTypeEntry unknownTypeEntry = new PrimitiveTypeEntry( - PrimitiveCategory.UNKNOWN, "unknown", null, Object.class, null); + PrimitiveCategory.UNKNOWN, "unknown", null, Object.class, null, null); static { registerType(binaryTypeEntry); @@ -317,6 +425,23 @@ public static PrimitiveTypeEntry getTypeEntryFromTypeName(String typeName) { return typeNameToTypeEntry.get(typeName); } + public static PrimitiveTypeEntry getTypeEntryFromTypeSpecs( + PrimitiveCategory primitiveCategory, + BaseTypeParams typeParams) { + String typeString = primitiveCategory.toString().toLowerCase(); + if (typeParams != null) { + typeString += typeParams.toString(); + } + PrimitiveTypeEntry typeEntry = getTypeEntryFromTypeName(typeString); + if (typeEntry == null) { + // Parameterized type doesn't exist yet, create now. + typeEntry = (PrimitiveTypeEntry)getTypeEntryFromTypeSpecs(primitiveCategory, null).clone(); + typeEntry.typeParams = typeParams; + addParameterizedType(typeEntry); + } + return typeEntry; + } + /** * Compare 2 primitive objects. Conversion not allowed. Note that NULL does * not equal to NULL according to SQL standard. @@ -913,20 +1038,8 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { break; case STRING: StringObjectInspector soi = (StringObjectInspector) oi; - String s = soi.getPrimitiveJavaObject(o).trim(); - - // Throw away extra if more than 9 decimal places - int periodIdx = s.indexOf("."); - if (periodIdx != -1) { - if (s.length() - periodIdx > 9) { - s = s.substring(0, periodIdx + 10); - } - } - try { - result = Timestamp.valueOf(s); - } catch (IllegalArgumentException e) { - result = null; - } + String s = soi.getPrimitiveJavaObject(o); + result = getTimestampFromString(s); break; case DATE: result = new Timestamp( @@ -942,6 +1055,25 @@ public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { return result; } + static Timestamp getTimestampFromString(String s) { + Timestamp result; + s = s.trim(); + + // Throw away extra if more than 9 decimal places + int periodIdx = s.indexOf("."); + if (periodIdx != -1) { + if (s.length() - periodIdx > 9) { + s = s.substring(0, periodIdx + 10); + } + } + try { + result = Timestamp.valueOf(s); + } catch (IllegalArgumentException e) { + result = null; + } + return result; + } + public static Class getJavaPrimitiveClassFromObjectInspector(ObjectInspector oi) { if (oi.getCategory() != Category.PRIMITIVE) { return null; @@ -994,4 +1126,37 @@ private PrimitiveObjectInspectorUtils() { // prevent instantiation } + /** + * Helper class to store parameterized primitive object inspectors, which can be + * used by the various object inspector factory methods. + */ + public static class ParameterizedObjectInspectorMap { + HashMap> entries; + + public ParameterizedObjectInspectorMap() { + entries = + new HashMap>(); + } + + public PrimitiveObjectInspector getObjectInspector( + PrimitiveTypeSpec typeSpec) { + PrimitiveCategory category = typeSpec.getPrimitiveCategory(); + BaseTypeParams params = typeSpec.getTypeParams(); + HashMap entriesForCategory = entries.get(category); + if (entriesForCategory == null) { + return null; + } + return (PrimitiveObjectInspector)entriesForCategory.get(params.toString()); + } + + public void setObjectInspector(PrimitiveObjectInspector oi) { + PrimitiveCategory category = oi.getPrimitiveCategory(); + HashMap entriesForCategory = entries.get(category); + if (entriesForCategory == null) { + entriesForCategory = new HashMap(); + entries.put(category, entriesForCategory); + } + entriesForCategory.put(oi.getTypeParams().toString(), oi); + } + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseTypeParams.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseTypeParams.java new file mode 100644 index 0000000..1eba561 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/BaseTypeParams.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.Serializable; + +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.io.Writable; + +/** + * Base type for type-specific params, such as char(10) or decimal(10, 2). + */ +public abstract class BaseTypeParams implements Writable, Serializable { + + private static final long serialVersionUID = 1L; + + public abstract void validateParams() throws SerDeException; + + public abstract void populateParams(String[] params) throws SerDeException; + + public abstract String toString(); + + public void set(String[] params) throws SerDeException { + populateParams(params); + validateParams(); + } + + // Needed for conversion to/from TypeQualifiers. Override in subclasses. + public boolean hasCharacterMaximumLength() { + return false; + } + public int getCharacterMaximumLength() { + return -1; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java new file mode 100644 index 0000000..9dcf4cc --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/ParameterizedPrimitiveTypeUtils.java @@ -0,0 +1,43 @@ +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +public class ParameterizedPrimitiveTypeUtils { + + public static BaseTypeParams getTypeParamsFromTypeInfo(TypeInfo typeInfo) { + BaseTypeParams typeParams = null; + if (typeInfo instanceof PrimitiveTypeInfo) { + PrimitiveTypeInfo ppti = (PrimitiveTypeInfo)typeInfo; + typeParams = ppti.getTypeParams(); + } + return typeParams; + } + + public static BaseTypeParams getTypeParamsFromPrimitiveTypeEntry(PrimitiveTypeEntry typeEntry) { + return typeEntry.typeParams; + } + + public static BaseTypeParams getTypeParamsFromPrimitiveObjectInspector( + PrimitiveObjectInspector oi) { + return oi.getTypeParams(); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java index 46d3f3d..40785cd 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfo.java @@ -23,19 +23,21 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; /** * There are limited number of Primitive Types. All Primitive Types are defined * by TypeInfoFactory.isPrimitiveClass(). - * + * * Always use the TypeInfoFactory to create new TypeInfo objects, instead of * directly creating an instance of this class. */ -public final class PrimitiveTypeInfo extends TypeInfo implements Serializable { +public class PrimitiveTypeInfo extends TypeInfo implements Serializable, PrimitiveTypeSpec { private static final long serialVersionUID = 1L; - private String typeName; + protected String typeName; + protected BaseTypeParams typeParams; /** * For java serialization use only. @@ -59,7 +61,7 @@ public Category getCategory() { } public PrimitiveCategory getPrimitiveCategory() { - return PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName(typeName).primitiveCategory; + return getPrimitiveTypeEntry().primitiveCategory; } public Class getPrimitiveWritableClass() { @@ -81,6 +83,36 @@ public String getTypeName() { } /** + * If the type has type parameters (such as varchar length, or decimal precision/scale), + * then return the parameters for the type. + * @return A BaseTypeParams object representing the parameters for the type, or null + */ + public BaseTypeParams getTypeParams() { + return typeParams; + } + + /** + * Set the type parameters for the type. + * @param typeParams type parameters for the type + */ + public void setTypeParams(BaseTypeParams typeParams) { + // Ideally could check here to make sure the type really supports parameters, + // however during deserialization some of the required fields are not set at the + // time that the type params are set. We would have to customize the way this class + // is serialized/deserialized for the check to work. + //if (typeParams != null && !getPrimitiveTypeEntry().isParameterized()) { + // throw new UnsupportedOperationException( + // "Attempting to add type parameters " + typeParams + " to type " + getTypeName()); + //} + this.typeParams = typeParams; + } + + public PrimitiveTypeEntry getPrimitiveTypeEntry() { + return PrimitiveObjectInspectorUtils.getTypeEntryFromTypeName( + TypeInfoUtils.getBaseName(typeName)); + } + + /** * Compare if 2 TypeInfos are the same. We use TypeInfoFactory to cache * TypeInfos, so we only need to compare the Object pointer. */ @@ -97,4 +129,8 @@ public int hashCode() { return typeName.hashCode(); } + @Override + public String toString() { + return typeName; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeSpec.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeSpec.java new file mode 100644 index 0000000..6d8af3c --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeSpec.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; + +/** + * Interface to encapsulate retrieving of type information, for the object inspector factory. + * + */ +public interface PrimitiveTypeSpec { + /** + * @return PrimitiveCategory referred to by the PrimitiveTypeSpec + */ + PrimitiveCategory getPrimitiveCategory(); + + /** + * @return Type params referred to by the PrimitiveTypeSpec + */ + BaseTypeParams getTypeParams(); +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java index 542af09..48fbeae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -22,8 +22,11 @@ import java.util.List; import java.util.concurrent.ConcurrentHashMap; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; /** * TypeInfoFactory can be used to create the TypeInfo object for any types. @@ -33,7 +36,7 @@ * objects that represents the same type. */ public final class TypeInfoFactory { - + private static Log LOG = LogFactory.getLog(TypeInfoFactory.class); static ConcurrentHashMap cachedPrimitiveTypeInfo = new ConcurrentHashMap(); private TypeInfoFactory() { @@ -41,13 +44,32 @@ private TypeInfoFactory() { } public static TypeInfo getPrimitiveTypeInfo(String typeName) { - if (null == PrimitiveObjectInspectorUtils - .getTypeEntryFromTypeName(typeName)) { + PrimitiveTypeEntry typeEntry = PrimitiveObjectInspectorUtils + .getTypeEntryFromTypeName(TypeInfoUtils.getBaseName(typeName)); + if (null == typeEntry) { throw new RuntimeException("Cannot getPrimitiveTypeInfo for " + typeName); } TypeInfo result = cachedPrimitiveTypeInfo.get(typeName); if (result == null) { - result = new PrimitiveTypeInfo(typeName); + TypeInfoUtils.PrimitiveParts parts = TypeInfoUtils.parsePrimitiveParts(typeName); + // Create params if there are any + if (parts.typeParams != null && parts.typeParams.length > 0) { + // The type string came with parameters. Parse and add to TypeInfo + try { + BaseTypeParams typeParams = PrimitiveTypeEntry.createTypeParams( + parts.typeName, parts.typeParams); + result = new PrimitiveTypeInfo(typeName); + ((PrimitiveTypeInfo) result).setTypeParams(typeParams); + } catch (Exception err) { + LOG.error(err); + throw new RuntimeException("Error creating type parameters for " + typeName + + ": " + err, err); + } + } else { + // No type params + result = new PrimitiveTypeInfo(parts.typeName); + } + cachedPrimitiveTypeInfo.put(typeName, result); } return result; @@ -66,6 +88,8 @@ public static TypeInfo getPrimitiveTypeInfo(String typeName) { public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME); public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME); public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME); + // Disallow usage of varchar without length specifier. + //public static final TypeInfo varcharTypeInfo = getPrimitiveTypeInfo(serdeConstants.VARCHAR_TYPE_NAME); public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 84e0b2e..3d1c50f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -22,23 +22,25 @@ import java.lang.reflect.Method; import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; -import java.util.concurrent.ConcurrentHashMap; import java.util.ArrayList; import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; @@ -56,7 +58,7 @@ private TypeInfoUtils() { /** * Return the extended TypeInfo from a Java type. By extended TypeInfo, we * allow unknownType for java.lang.Object. - * + * * @param t * The Java type. * @param m @@ -148,7 +150,7 @@ public static Type getArrayElementType(Type t) { /** * Get the parameter TypeInfo for a method. - * + * * @param size * In case the last parameter of Method is an array, we will try to * return a List with the specified size by repeating the @@ -194,12 +196,46 @@ public static Type getArrayElementType(Type t) { return typeInfos; } + public static boolean hasParameters(String typeName) { + int idx = typeName.indexOf('('); + if (idx == -1) { + return false; + } else { + return true; + } + } + + public static String getBaseName(String typeName) { + int idx = typeName.indexOf('('); + if (idx == -1) { + return typeName; + } else { + return typeName.substring(0, idx); + } + } + + /** + * returns true if both TypeInfos are of primitive type, and the primitive category matches. + * @param ti1 + * @param ti2 + * @return + */ + public static boolean doPrimitiveCategoriesMatch(TypeInfo ti1, TypeInfo ti2) { + if (ti1.getCategory() == Category.PRIMITIVE && ti2.getCategory() == Category.PRIMITIVE) { + if (((PrimitiveTypeInfo)ti1).getPrimitiveCategory() + == ((PrimitiveTypeInfo)ti2).getPrimitiveCategory()) { + return true; + } + } + return false; + } + /** * Parse a recursive TypeInfo list String. For example, the following inputs * are valid inputs: * "int,string,map,list>>,list>" * The separators between TypeInfos can be ",", ":", or ";". - * + * * In order to use this class: TypeInfoParser parser = new * TypeInfoParser("int,string"); ArrayList typeInfos = * parser.parseTypeInfos(); @@ -225,7 +261,7 @@ private static boolean isTypeChar(char c) { * Tokenize the typeInfoString. The rule is simple: all consecutive * alphadigits and '_', '.' are in one token, and all other characters are * one character per token. - * + * * tokenize("map") should return * ["map","<","int",",","string",">"] */ @@ -281,6 +317,14 @@ public TypeInfoParser(String typeInfoString) { return typeInfos; } + private Token peek() { + if (iToken < typeInfoTokens.size()) { + return typeInfoTokens.get(iToken); + } else { + return null; + } + } + private Token expect(String item) { return expect(item, null); } @@ -320,6 +364,27 @@ private Token expect(String item, String alternative) { return t; } + private String[] parseParams() { + List params = new LinkedList(); + + Token t = peek(); + if (t != null && t.text.equals("(")) { + expect("("); + + // checking for null in the for-loop condition prevents null-ptr exception + // and allows us to fail more gracefully with a parsing error. + for(t = peek(); (t == null) || !t.text.equals(")"); t = expect(",",")")) { + params.add(expect("name").text); + } + if (params.size() == 0) { + throw new IllegalArgumentException( + "type parameters expected for type string " + typeInfoString); + } + } + + return params.toArray(new String[params.size()]); + } + private TypeInfo parseType() { Token t = expect("type"); @@ -329,7 +394,11 @@ private TypeInfo parseType() { .getTypeEntryFromTypeName(t.text); if (primitiveType != null && !primitiveType.primitiveCategory.equals(PrimitiveCategory.UNKNOWN)) { - return TypeInfoFactory.getPrimitiveTypeInfo(primitiveType.typeName); + if (primitiveType.isParameterized()) { + primitiveType = primitiveType.addParameters(parseParams()); + } + // If type has qualifiers, the TypeInfo needs them in its type string + return TypeInfoFactory.getPrimitiveTypeInfo(primitiveType.toString()); } // Is this a list type? @@ -399,6 +468,26 @@ private TypeInfo parseType() { + t.position + " of '" + typeInfoString + "'"); } + public PrimitiveParts parsePrimitiveParts() { + PrimitiveParts parts = new PrimitiveParts(); + Token t = expect("type"); + parts.typeName = t.text; + parts.typeParams = parseParams(); + return parts; + } + } + + public static class PrimitiveParts { + public String typeName; + public String[] typeParams; + } + + /** + * Make some of the TypeInfo parsing available as a utility. + */ + public static PrimitiveParts parsePrimitiveParts(String typeInfoString) { + TypeInfoParser parser = new TypeInfoParser(typeInfoString); + return parser.parsePrimitiveParts(); } static Map cachedStandardObjectInspector = @@ -414,9 +503,8 @@ public static ObjectInspector getStandardWritableObjectInspectorFromTypeInfo( if (result == null) { switch (typeInfo.getCategory()) { case PRIMITIVE: { - result = PrimitiveObjectInspectorFactory - .getPrimitiveWritableObjectInspector(((PrimitiveTypeInfo) typeInfo) - .getPrimitiveCategory()); + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) typeInfo); break; } case LIST: { @@ -494,8 +582,7 @@ public static ObjectInspector getStandardJavaObjectInspectorFromTypeInfo( // NOTE: we use JavaPrimitiveObjectInspector instead of // StandardPrimitiveObjectInspector result = PrimitiveObjectInspectorFactory - .getPrimitiveJavaObjectInspector(PrimitiveObjectInspectorUtils - .getTypeEntryFromTypeName(typeInfo.getTypeName()).primitiveCategory); + .getPrimitiveJavaObjectInspector((PrimitiveTypeInfo) typeInfo); break; } case LIST: { @@ -631,4 +718,22 @@ public static TypeInfo getTypeInfoFromTypeString(String typeString) { TypeInfoParser parser = new TypeInfoParser(typeString); return parser.parseTypeInfos().get(0); } + + /** + * Given two types, determine whether conversion needs to occur to compare the two types. + * This is needed for cases like varchar, where the TypeInfo for varchar(10) != varchar(5), + * but there would be no need to have to convert to compare these values. + * @param typeA + * @param typeB + * @return + */ + public static boolean isConversionRequiredForComparison(TypeInfo typeA, TypeInfo typeB) { + if (typeA == typeB) { + return false; + } + if (TypeInfoUtils.doPrimitiveCategoriesMatch(typeA, typeB)) { + return false; + } + return true; + } }