diff --git a/itests/custom-serde/src/main/java/org/apache/hadoop/hive/serde2/CustomSerDeWithFaultyInspector.java b/itests/custom-serde/src/main/java/org/apache/hadoop/hive/serde2/CustomSerDeWithFaultyInspector.java new file mode 100644 index 0000000..6a8e19f --- /dev/null +++ b/itests/custom-serde/src/main/java/org/apache/hadoop/hive/serde2/CustomSerDeWithFaultyInspector.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import au.com.bytecode.opencsv.CSVWriter; + +@SerDeSpec(schemaProps = {serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES}) +public class CustomSerDeWithFaultyInspector extends AbstractSerDe { + + private ObjectInspector inspector; + private int numCols; + private List row; + + public static final String SEPARATORCHAR = "separatorChar"; + public static final String QUOTECHAR = "quoteChar"; + public static final String ESCAPECHAR = "escapeChar"; + + @Override + public void initialize(final Configuration conf, final Properties tbl) throws SerDeException { + + final List columnNames = Arrays.asList(tbl.getProperty(serdeConstants.LIST_COLUMNS) + .split(",")); + + numCols = columnNames.size(); + + final List columnOIs = new ArrayList(numCols); + + for (int i = 0; i < numCols; i++) { + // Blindly set this as a string OI. We should throw an error while verifying the type info + // for this column if it is not a string. + columnOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + } + + inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs); + row = new ArrayList(numCols); + + for (int i = 0; i < numCols; i++) { + row.add(null); + } + + getProperty(tbl, SEPARATORCHAR, CSVWriter.DEFAULT_SEPARATOR); + getProperty(tbl, QUOTECHAR, CSVWriter.DEFAULT_QUOTE_CHARACTER); + getProperty(tbl, ESCAPECHAR, CSVWriter.DEFAULT_ESCAPE_CHARACTER); + } + + private char getProperty(final Properties tbl, final String property, final char def) { + final String val = tbl.getProperty(property); + + if (val != null) { + return val.charAt(0); + } + + return def; + } + + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return inspector; + } + + @Override + public Class getSerializedClass() { + return Text.class; + } + + @Override + public Object deserialize(Writable blob) throws SerDeException { + // Now all the column values should always return NULL! + return row; + } + + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + return null; + } + + @Override + public SerDeStats getSerDeStats() { + // no support for statistics + return null; + } + +} diff --git a/ql/src/test/queries/clientnegative/create_table_faulty_oi_serde.q b/ql/src/test/queries/clientnegative/create_table_faulty_oi_serde.q new file mode 100644 index 0000000..6a9f0e4 --- /dev/null +++ b/ql/src/test/queries/clientnegative/create_table_faulty_oi_serde.q @@ -0,0 +1,5 @@ +ADD JAR ${system:maven.local.repository}/org/apache/hive/hive-it-custom-serde/${system:hive.version}/hive-it-custom-serde-${system:hive.version}.jar; +CREATE EXTERNAL TABLE test (totalprice DECIMAL(38,10)) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDeWithFaultyInspector' with +serdeproperties ("separatorChar" = ",","quoteChar"= "'","escapeChar"= "\\") +STORED AS TEXTFILE tblproperties ("skip.header.line.count"="1"); diff --git a/ql/src/test/results/clientnegative/create_table_faulty_oi_serde.q.out b/ql/src/test/results/clientnegative/create_table_faulty_oi_serde.q.out new file mode 100644 index 0000000..aa679e6 --- /dev/null +++ b/ql/src/test/results/clientnegative/create_table_faulty_oi_serde.q.out @@ -0,0 +1,8 @@ +PREHOOK: query: CREATE EXTERNAL TABLE test (totalprice DECIMAL(38,10)) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.CustomSerDeWithFaultyInspector' with +serdeproperties ("separatorChar" = ",","quoteChar"= "'","escapeChar"= "\\") +STORED AS TEXTFILE tblproperties ("skip.header.line.count"="1") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test +FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. java.lang.RuntimeException: MetaException(message:org.apache.hadoop.hive.serde2.SerDeException Deserializer: org.apache.hadoop.hive.serde2.CustomSerDeWithFaultyInspector actual column type: string is not the same as expected column type: decimal(38,10)) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 6e08dfd..d296319 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -24,6 +24,7 @@ import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -49,6 +50,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.slf4j.Logger; @@ -536,6 +540,55 @@ public static void initializeSerDe(Deserializer deserializer, Configuration conf } else { deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps)); } + verifyDataTypes(deserializer, tblProps); + } + + /** + * Verify the data types of the deserializer against the table properties. + * @param deserializer + * @param props + * @throws SerDeException + */ + private static void verifyDataTypes(Deserializer deserializer, Properties props) + throws SerDeException { + String columnTypeProperty = props.getProperty(serdeConstants.LIST_COLUMN_TYPES); + List expectedColumnTypes = columnTypeProperty == null ? null : + TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + + if (expectedColumnTypes == null || expectedColumnTypes.isEmpty()) { + // We leave the schema verification to the serde itself. + // For e.g. AvroSerDe, LazySimpleSerDe. The current AbstractSerDe interface + // doesn't provide an API to retrieve the expected column types for the SerDe. + return; + } + + ObjectInspector oi = deserializer.getObjectInspector(); + + if (oi == null || !(oi instanceof StructObjectInspector)) { + throw new SerDeException("Deserializer: " + deserializer.getClass().getName() + + " has null or non-struct object inspector"); + } + + TypeInfo ti = TypeInfoUtils.getTypeInfoFromObjectInspector(oi); + StructTypeInfo sti = (StructTypeInfo) ti; + List actualColumnTypes = sti.getAllStructFieldTypeInfos(); + + if (actualColumnTypes == null || actualColumnTypes.isEmpty()) { + return; + } + + if (expectedColumnTypes.size() != actualColumnTypes.size()) { + throw new SerDeException("Deserializer: " + deserializer.getClass().getName() + + " Expected column types size: " + expectedColumnTypes.size() + + " is not the same as actual column type size: " + actualColumnTypes.size()); + } + for (int i = 0; i < expectedColumnTypes.size(); i++) { + if (!actualColumnTypes.get(i).equals(expectedColumnTypes.get(i))) { + throw new SerDeException("Deserializer: " + deserializer.getClass().getName() + + " actual column type: " + actualColumnTypes.get(i) + + " is not the same as expected column type: " + expectedColumnTypes.get(i)); + } + } } /**