diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 7a44912..395fe31 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -3355,7 +3355,7 @@ public void alter_table_with_environment_context(final String dbname, ret = tbl.getSd().getCols(); } else { try { - Deserializer s = MetaStoreUtils.getDeserializer(hiveConf, tbl); + Deserializer s = MetaStoreUtils.getDeserializer(hiveConf, tbl, false); ret = MetaStoreUtils.getFieldsFromDeserializer(tableName, s); } catch (SerDeException e) { StringUtils.stringifyException(e); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 25c180d..bb8dcab 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -357,15 +357,21 @@ public static boolean updatePartitionStatsFast(PartitionSpecProxy.PartitionItera * */ static public Deserializer getDeserializer(Configuration conf, - org.apache.hadoop.hive.metastore.api.Table table) throws MetaException { + org.apache.hadoop.hive.metastore.api.Table table, boolean skipConfError) throws + MetaException { String lib = table.getSd().getSerdeInfo().getSerializationLib(); if (lib == null) { return null; } try { Deserializer deserializer = ReflectionUtils.newInstance(conf.getClassByName(lib). - asSubclass(Deserializer.class), conf); - SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null); + asSubclass(Deserializer.class), conf); + if (skipConfError) { + SerDeUtils.initializeSerDeWithoutErrorCheck(deserializer, conf, + MetaStoreUtils.getTableMetadata(table), null); + } else { + SerDeUtils.initializeSerDe(deserializer, conf, MetaStoreUtils.getTableMetadata(table), null); + } return deserializer; } catch (RuntimeException e) { throw e; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index d5374bc..64dfcac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -153,8 +153,10 @@ import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveV1Authorizer; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe; import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; @@ -183,6 +185,7 @@ import java.io.Writer; import java.net.URI; import java.net.URISyntaxException; +import java.sql.SQLException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -2802,7 +2805,7 @@ private int unlockTable(UnlockTableDesc unlockTbl) throws HiveException { * is the function we are describing * @throws HiveException */ - private int describeFunction(DescFunctionDesc descFunc) throws HiveException { + private int describeFunction(DescFunctionDesc descFunc) throws HiveException, SQLException { String funcName = descFunc.getName(); // write the results in the file @@ -3083,6 +3086,15 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { List cols = null; List colStats = null; + + Deserializer deserializer = tbl.getDeserializer(true); + if (deserializer instanceof AbstractSerDe) { + String errorMsgs = ((AbstractSerDe) deserializer).getConfigurationErrors(); + if (errorMsgs != null && !errorMsgs.isEmpty()) { + throw new SQLException(errorMsgs); + } + } + if (colPath.equals(tableName)) { cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? tbl.getCols() : part.getCols(); @@ -3091,7 +3103,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { cols.addAll(tbl.getPartCols()); } } else { - cols = Hive.getFieldsFromDeserializer(colPath, tbl.getDeserializer()); + cols = Hive.getFieldsFromDeserializer(colPath, deserializer); if (descTbl.isFormatted()) { // when column name is specified in describe table DDL, colPath will // will be table_name.column_name @@ -3121,6 +3133,8 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { outStream.close(); outStream = null; + } catch (SQLException e) { + throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName); } catch (IOException e) { throw new HiveException(e, ErrorMsg.GENERIC_ERROR, tableName); } finally { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index 8422782..b94f1e4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -239,7 +239,7 @@ public boolean isEmptyTable() { private StructObjectInspector getRowInspectorFromTable(TableDesc table) throws Exception { Deserializer serde = table.getDeserializerClass().newInstance(); - SerDeUtils.initializeSerDe(serde, job, table.getProperties(), null); + SerDeUtils.initializeSerDeWithoutErrorCheck(serde, job, table.getProperties(), null); return createRowInspector(getStructOIFrom(serde.getObjectInspector())); } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 2bbedd3..025d4be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -192,7 +192,7 @@ public void checkValidity() throws HiveException { "at least one column must be specified for the table"); } if (!isView()) { - if (null == getDeserializerFromMetaStore()) { + if (null == getDeserializerFromMetaStore(false)) { throw new HiveException("must specify a non-null serDe"); } if (null == getInputFormatClass()) { @@ -253,14 +253,21 @@ final public Path getDataLocation() { final public Deserializer getDeserializer() { if (deserializer == null) { - deserializer = getDeserializerFromMetaStore(); + deserializer = getDeserializerFromMetaStore(false); } return deserializer; } - private Deserializer getDeserializerFromMetaStore() { + final public Deserializer getDeserializer(boolean skipConfError) { + if (deserializer == null) { + deserializer = getDeserializerFromMetaStore(skipConfError); + } + return deserializer; + } + + final public Deserializer getDeserializerFromMetaStore(boolean skipConfError) { try { - return MetaStoreUtils.getDeserializer(Hive.get().getConf(), tTable); + return MetaStoreUtils.getDeserializer(Hive.get().getConf(), tTable, skipConfError); } catch (MetaException e) { throw new RuntimeException(e); } catch (HiveException e) { diff --git ql/src/test/queries/clientpositive/avro_schema_error_message.q ql/src/test/queries/clientpositive/avro_schema_error_message.q deleted file mode 100644 index cf1fda1..0000000 --- ql/src/test/queries/clientpositive/avro_schema_error_message.q +++ /dev/null @@ -1,11 +0,0 @@ --- verify we get the sentinel schema if we don't provide one - -CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; - -DESCRIBE avro_with_no_schema; - diff --git ql/src/test/results/beelinepositive/avro_schema_error_message.q.out ql/src/test/results/beelinepositive/avro_schema_error_message.q.out deleted file mode 100644 index 0dfc75a..0000000 --- ql/src/test/results/beelinepositive/avro_schema_error_message.q.out +++ /dev/null @@ -1,24 +0,0 @@ -Saving all output to "!!{outputDirectory}!!/avro_schema_error_message.q.raw". Enter "record" with no arguments to stop it. ->>> !run !!{qFileDirectory}!!/avro_schema_error_message.q ->>> -- verify we get the sentinel schema if we don't provide one ->>> ->>> CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; -No rows affected ->>> ->>> DESCRIBE avro_with_no_schema; -'col_name','data_type','comment' -'error_error_error_error_error_error_error','string','from deserializer' -'cannot_determine_schema','string','from deserializer' -'check','string','from deserializer' -'schema','string','from deserializer' -'url','string','from deserializer' -'and','string','from deserializer' -'literal','string','from deserializer' -7 rows selected ->>> ->>> !record diff --git ql/src/test/results/clientpositive/avro_schema_error_message.q.out ql/src/test/results/clientpositive/avro_schema_error_message.q.out deleted file mode 100644 index 967a847..0000000 --- ql/src/test/results/clientpositive/avro_schema_error_message.q.out +++ /dev/null @@ -1,35 +0,0 @@ -PREHOOK: query: -- verify we get the sentinel schema if we don't provide one - -CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_with_no_schema -POSTHOOK: query: -- verify we get the sentinel schema if we don't provide one - -CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_with_no_schema -PREHOOK: query: DESCRIBE avro_with_no_schema -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@avro_with_no_schema -POSTHOOK: query: DESCRIBE avro_with_no_schema -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@avro_with_no_schema -error_error_error_error_error_error_error string from deserializer -cannot_determine_schema string from deserializer -check string from deserializer -schema string from deserializer -url string from deserializer -and string from deserializer -literal string from deserializer diff --git serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java index 28cfe07..3cb425d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java @@ -31,6 +31,8 @@ */ public abstract class AbstractSerDe implements SerDe { + protected String configErrors; + /** * Initialize the SerDe. By default, this will use one set of properties, either the * table properties or the partition properties. If a SerDe needs access to both sets, @@ -101,4 +103,17 @@ public abstract Writable serialize(Object obj, ObjectInspector objInspector) * structure of the Object returned from deserialize(...). */ public abstract ObjectInspector getObjectInspector() throws SerDeException; + + /** + * Get the error messages during the Serde configuration + * + * @return The error messages in the configuration which are empty if no error occurred + */ + public String getConfigurationErrors() { + if (configErrors == null || configErrors.isEmpty()) { + return ""; + } else { + return configErrors; + } + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 274d468..03ff835 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -506,7 +506,8 @@ public static Properties createOverlayedProperties(Properties tblProps, Properti /** * Initializes a SerDe. - * @param serde + * @param deserializer + * @param conf * @param tblProps * @param partProps * @throws SerDeException @@ -516,6 +517,28 @@ public static void initializeSerDe(Deserializer deserializer, Configuration conf throws SerDeException { if (deserializer instanceof AbstractSerDe) { ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps); + String msg = ((AbstractSerDe) deserializer).getConfigurationErrors(); + if (msg != null && !msg.isEmpty()) { + throw new SerDeException(msg); + } + } else { + deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps)); + } + } + + /** + * Initializes a SerDe. + * @param deserializer + * @param conf + * @param tblProps + * @param partProps + * @throws SerDeException + */ + public static void initializeSerDeWithoutErrorCheck(Deserializer deserializer, + Configuration conf, Properties tblProps, + Properties partProps) throws SerDeException { + if (deserializer instanceof AbstractSerDe) { + ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps); } else { deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps)); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index b93121d..49aa83b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -89,7 +89,7 @@ public void initialize(Configuration configuration, Properties properties) throw || properties.getProperty(AvroSerdeUtils.SCHEMA_URL) != null || columnNameProperty == null || columnNameProperty.isEmpty() || columnTypeProperty == null || columnTypeProperty.isEmpty()) { - schema = AvroSerdeUtils.determineSchemaOrReturnErrorSchema(properties); + schema = determineSchemaOrReturnErrorSchema(properties); } else { // Get column names and sort order columnNames = Arrays.asList(columnNameProperty.split(",")); @@ -135,6 +135,32 @@ public void initialize(Configuration configuration, Properties properties) throw this.oi = aoig.getObjectInspector(); } + /** + * Attempt to determine the schema via the usual means, but do not throw + * an exception if we fail. Instead, signal failure via a special + * schema. This is used because Hive calls init on the serde during + * any call, including calls to update the serde properties, meaning + * if the serde is in a bad state, there is no way to update that state. + */ + public Schema determineSchemaOrReturnErrorSchema(Properties props) { + try { + configErrors = ""; + return AvroSerdeUtils.determineSchemaOrThrowException(props); + } catch(AvroSerdeException he) { + LOG.warn("Encountered AvroSerdeException determining schema. Returning " + + "signal schema to indicate problem", he); + configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + + "signal schema to indicate problem: " + he.getMessage()); + return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; + } catch (Exception e) { + LOG.warn("Encountered exception determining schema. Returning signal " + + "schema to indicate problem", e); + configErrors = new String("Encountered exception determining schema. Returning signal " + + "schema to indicate problem: " + e.getMessage()); + return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; + } + } + @Override public Class getSerializedClass() { return AvroGenericRecordWritable.class; diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index 5da12cb..c0f054f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -91,26 +91,6 @@ public static Schema determineSchemaOrThrowException(Properties properties) } } - /** - * Attempt to determine the schema via the usual means, but do not throw - * an exception if we fail. Instead, signal failure via a special - * schema. This is used because Hive calls init on the serde during - * any call, including calls to update the serde properties, meaning - * if the serde is in a bad state, there is no way to update that state. - */ - public static Schema determineSchemaOrReturnErrorSchema(Properties props) { - try { - return determineSchemaOrThrowException(props); - } catch(AvroSerdeException he) { - LOG.warn("Encountered AvroSerdeException determining schema. Returning " + - "signal schema to indicate problem", he); - return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; - } catch (Exception e) { - LOG.warn("Encountered exception determining schema. Returning signal " + - "schema to indicate problem", e); - return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; - } - } // Protected for testing and so we can pass in a conf for testing. protected static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf) throws IOException, URISyntaxException { diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java index 803a987..36dc484 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java @@ -85,77 +85,59 @@ public void initializeDoesNotReuseSchemasFromConf() throws SerDeException { } @Test - public void noSchemaProvidedReturnsErrorSchema() throws SerDeException { + public void noSchemaProvidedThrowsException() { Properties props = new Properties(); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void gibberishSchemaProvidedReturnsErrorSchema() throws SerDeException { + public void gibberishSchemaProvidedReturnsErrorSchema() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_LITERAL, "blahblahblah"); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void emptySchemaProvidedReturnsErrorSchema() throws SerDeException { + public void emptySchemaProvidedThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_LITERAL, ""); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void badSchemaURLProvidedReturnsErrorSchema() throws SerDeException { + public void badSchemaURLProvidedThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_URL, "not://a/url"); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void emptySchemaURLProvidedReturnsErrorSchema() throws SerDeException { + public void emptySchemaURLProvidedThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_URL, ""); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void bothPropertiesSetToNoneReturnsErrorSchema() throws SerDeException { + public void bothPropertiesSetToNoneThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_URL, AvroSerdeUtils.SCHEMA_NONE); props.put(AvroSerdeUtils.SCHEMA_LITERAL, AvroSerdeUtils.SCHEMA_NONE); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } - private void verifyErrorSchemaReturned(Properties props) throws SerDeException { + private void verifyExpectedException(Properties props) { AvroSerDe asd = new AvroSerDe(); - SerDeUtils.initializeSerDe(asd, new Configuration(), props, null); - assertTrue(asd.getObjectInspector() instanceof StandardStructObjectInspector); - StandardStructObjectInspector oi = (StandardStructObjectInspector)asd.getObjectInspector(); - List allStructFieldRefs = oi.getAllStructFieldRefs(); - assertEquals(SchemaResolutionProblem.SIGNAL_BAD_SCHEMA.getFields().size(), allStructFieldRefs.size()); - StructField firstField = allStructFieldRefs.get(0); - assertTrue(firstField.toString().contains("error_error_error_error_error_error_error")); - - try { - Writable mock = Mockito.mock(Writable.class); - asd.deserialize(mock); - fail("Should have thrown a BadSchemaException"); - } catch (BadSchemaException bse) { - // good - } - try { - Object o = Mockito.mock(Object.class); - ObjectInspector mockOI = Mockito.mock(ObjectInspector.class); - asd.serialize(o, mockOI); - fail("Should have thrown a BadSchemaException"); - } catch (BadSchemaException bse) { + SerDeUtils.initializeSerDe(asd, new Configuration(), props, null); + fail("Expected Exception did not be thrown"); + } catch (SerDeException e) { // good } }