diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index 020943f..dd17f62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -3054,7 +3054,7 @@ private int describeTable(Hive db, DescTableDesc descTbl) throws HiveException { List colStats = null; if (colPath.equals(tableName)) { cols = (part == null || tbl.getTableType() == TableType.VIRTUAL_VIEW) ? - tbl.getCols() : part.getCols(); + tbl.getCols(true) : part.getCols(); if (!descTbl.isFormatted()) { cols.addAll(tbl.getPartCols()); diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java index 4acafba..ab8e93f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java @@ -258,7 +258,7 @@ final public Deserializer getDeserializer() { return deserializer; } - private Deserializer getDeserializerFromMetaStore() { + final public Deserializer getDeserializerFromMetaStore() { try { return MetaStoreUtils.getDeserializer(Hive.get().getConf(), tTable); } catch (MetaException e) { @@ -611,6 +611,21 @@ private boolean isField(String col) { return new ArrayList(); } + public List getCols(boolean skipConfErr) { + + String serializationLib = getSerializationLib(); + try { + if (hasMetastoreBasedSchema(Hive.get().getConf(), serializationLib)) { + return tTable.getSd().getCols(); + } else { + return Hive.getFieldsFromDeserializer(getTableName(), getDeserializer()); + } + } catch (HiveException e) { + LOG.error("Unable to get field from serde: " + serializationLib, e); + } + return new ArrayList(); + } + /** * Returns a list of all the columns of the table (data columns + partition * columns in that order. diff --git ql/src/test/queries/clientpositive/avro_schema_error_message.q ql/src/test/queries/clientpositive/avro_schema_error_message.q deleted file mode 100644 index cf1fda1..0000000 --- ql/src/test/queries/clientpositive/avro_schema_error_message.q +++ /dev/null @@ -1,11 +0,0 @@ --- verify we get the sentinel schema if we don't provide one - -CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; - -DESCRIBE avro_with_no_schema; - diff --git ql/src/test/results/beelinepositive/avro_schema_error_message.q.out ql/src/test/results/beelinepositive/avro_schema_error_message.q.out deleted file mode 100644 index 0dfc75a..0000000 --- ql/src/test/results/beelinepositive/avro_schema_error_message.q.out +++ /dev/null @@ -1,24 +0,0 @@ -Saving all output to "!!{outputDirectory}!!/avro_schema_error_message.q.raw". Enter "record" with no arguments to stop it. ->>> !run !!{qFileDirectory}!!/avro_schema_error_message.q ->>> -- verify we get the sentinel schema if we don't provide one ->>> ->>> CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'; -No rows affected ->>> ->>> DESCRIBE avro_with_no_schema; -'col_name','data_type','comment' -'error_error_error_error_error_error_error','string','from deserializer' -'cannot_determine_schema','string','from deserializer' -'check','string','from deserializer' -'schema','string','from deserializer' -'url','string','from deserializer' -'and','string','from deserializer' -'literal','string','from deserializer' -7 rows selected ->>> ->>> !record diff --git ql/src/test/results/clientpositive/avro_schema_error_message.q.out ql/src/test/results/clientpositive/avro_schema_error_message.q.out deleted file mode 100644 index 967a847..0000000 --- ql/src/test/results/clientpositive/avro_schema_error_message.q.out +++ /dev/null @@ -1,35 +0,0 @@ -PREHOOK: query: -- verify we get the sentinel schema if we don't provide one - -CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -PREHOOK: type: CREATETABLE -PREHOOK: Output: database:default -PREHOOK: Output: default@avro_with_no_schema -POSTHOOK: query: -- verify we get the sentinel schema if we don't provide one - -CREATE TABLE avro_with_no_schema -ROW FORMAT -SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' -STORED AS -INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' -OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' -POSTHOOK: type: CREATETABLE -POSTHOOK: Output: database:default -POSTHOOK: Output: default@avro_with_no_schema -PREHOOK: query: DESCRIBE avro_with_no_schema -PREHOOK: type: DESCTABLE -PREHOOK: Input: default@avro_with_no_schema -POSTHOOK: query: DESCRIBE avro_with_no_schema -POSTHOOK: type: DESCTABLE -POSTHOOK: Input: default@avro_with_no_schema -error_error_error_error_error_error_error string from deserializer -cannot_determine_schema string from deserializer -check string from deserializer -schema string from deserializer -url string from deserializer -and string from deserializer -literal string from deserializer diff --git serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java index 28cfe07..3cb425d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/AbstractSerDe.java @@ -31,6 +31,8 @@ */ public abstract class AbstractSerDe implements SerDe { + protected String configErrors; + /** * Initialize the SerDe. By default, this will use one set of properties, either the * table properties or the partition properties. If a SerDe needs access to both sets, @@ -101,4 +103,17 @@ public abstract Writable serialize(Object obj, ObjectInspector objInspector) * structure of the Object returned from deserialize(...). */ public abstract ObjectInspector getObjectInspector() throws SerDeException; + + /** + * Get the error messages during the Serde configuration + * + * @return The error messages in the configuration which are empty if no error occurred + */ + public String getConfigurationErrors() { + if (configErrors == null || configErrors.isEmpty()) { + return ""; + } else { + return configErrors; + } + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 274d468..d09f984 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -506,7 +506,8 @@ public static Properties createOverlayedProperties(Properties tblProps, Properti /** * Initializes a SerDe. - * @param serde + * @param deserializer + * @param conf * @param tblProps * @param partProps * @throws SerDeException @@ -515,7 +516,12 @@ public static void initializeSerDe(Deserializer deserializer, Configuration conf Properties tblProps, Properties partProps) throws SerDeException { if (deserializer instanceof AbstractSerDe) { + AbstractSerDe serDe = ((AbstractSerDe) deserializer); ((AbstractSerDe) deserializer).initialize(conf, tblProps, partProps); + String msg = serDe.getConfigurationErrors(); + if (!msg.isEmpty()) { + throw new SerDeException(msg); + } } else { deserializer.initialize(conf, createOverlayedProperties(tblProps, partProps)); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index 69545b0..45e885f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -83,7 +83,7 @@ public void initialize(Configuration configuration, Properties properties) throw || properties.getProperty(AvroSerdeUtils.SCHEMA_URL) != null || columnNameProperty == null || columnNameProperty.isEmpty() || columnTypeProperty == null || columnTypeProperty.isEmpty()) { - schema = AvroSerdeUtils.determineSchemaOrReturnErrorSchema(properties); + schema = determineSchemaOrReturnErrorSchema(properties); } else { // Get column names and sort order columnNames = Arrays.asList(columnNameProperty.split(",")); @@ -129,6 +129,32 @@ public void initialize(Configuration configuration, Properties properties) throw this.oi = aoig.getObjectInspector(); } + /** + * Attempt to determine the schema via the usual means, but do not throw + * an exception if we fail. Instead, signal failure via a special + * schema. This is used because Hive calls init on the serde during + * any call, including calls to update the serde properties, meaning + * if the serde is in a bad state, there is no way to update that state. + */ + public Schema determineSchemaOrReturnErrorSchema(Properties props) { + try { + configErrors = ""; + return AvroSerdeUtils.determineSchemaOrThrowException(props); + } catch(AvroSerdeException he) { + LOG.warn("Encountered AvroSerdeException determining schema. Returning " + + "signal schema to indicate problem", he); + configErrors = new String("Encountered AvroSerdeException determining schema. Returning " + + "signal schema to indicate problem: " + he.getMessage()); + return schema = SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; + } catch (Exception e) { + LOG.warn("Encountered exception determining schema. Returning signal " + + "schema to indicate problem", e); + configErrors = new String("Encountered exception determining schema. Returning signal " + + "schema to indicate problem: " + e.getMessage()); + return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; + } + } + @Override public Class getSerializedClass() { return AvroGenericRecordWritable.class; diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index 5da12cb..c0f054f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -91,26 +91,6 @@ public static Schema determineSchemaOrThrowException(Properties properties) } } - /** - * Attempt to determine the schema via the usual means, but do not throw - * an exception if we fail. Instead, signal failure via a special - * schema. This is used because Hive calls init on the serde during - * any call, including calls to update the serde properties, meaning - * if the serde is in a bad state, there is no way to update that state. - */ - public static Schema determineSchemaOrReturnErrorSchema(Properties props) { - try { - return determineSchemaOrThrowException(props); - } catch(AvroSerdeException he) { - LOG.warn("Encountered AvroSerdeException determining schema. Returning " + - "signal schema to indicate problem", he); - return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; - } catch (Exception e) { - LOG.warn("Encountered exception determining schema. Returning signal " + - "schema to indicate problem", e); - return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; - } - } // Protected for testing and so we can pass in a conf for testing. protected static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf) throws IOException, URISyntaxException { diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java index 803a987..36dc484 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java @@ -85,77 +85,59 @@ public void initializeDoesNotReuseSchemasFromConf() throws SerDeException { } @Test - public void noSchemaProvidedReturnsErrorSchema() throws SerDeException { + public void noSchemaProvidedThrowsException() { Properties props = new Properties(); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void gibberishSchemaProvidedReturnsErrorSchema() throws SerDeException { + public void gibberishSchemaProvidedReturnsErrorSchema() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_LITERAL, "blahblahblah"); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void emptySchemaProvidedReturnsErrorSchema() throws SerDeException { + public void emptySchemaProvidedThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_LITERAL, ""); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void badSchemaURLProvidedReturnsErrorSchema() throws SerDeException { + public void badSchemaURLProvidedThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_URL, "not://a/url"); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void emptySchemaURLProvidedReturnsErrorSchema() throws SerDeException { + public void emptySchemaURLProvidedThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_URL, ""); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } @Test - public void bothPropertiesSetToNoneReturnsErrorSchema() throws SerDeException { + public void bothPropertiesSetToNoneThrowsException() { Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_URL, AvroSerdeUtils.SCHEMA_NONE); props.put(AvroSerdeUtils.SCHEMA_LITERAL, AvroSerdeUtils.SCHEMA_NONE); - verifyErrorSchemaReturned(props); + verifyExpectedException(props); } - private void verifyErrorSchemaReturned(Properties props) throws SerDeException { + private void verifyExpectedException(Properties props) { AvroSerDe asd = new AvroSerDe(); - SerDeUtils.initializeSerDe(asd, new Configuration(), props, null); - assertTrue(asd.getObjectInspector() instanceof StandardStructObjectInspector); - StandardStructObjectInspector oi = (StandardStructObjectInspector)asd.getObjectInspector(); - List allStructFieldRefs = oi.getAllStructFieldRefs(); - assertEquals(SchemaResolutionProblem.SIGNAL_BAD_SCHEMA.getFields().size(), allStructFieldRefs.size()); - StructField firstField = allStructFieldRefs.get(0); - assertTrue(firstField.toString().contains("error_error_error_error_error_error_error")); - - try { - Writable mock = Mockito.mock(Writable.class); - asd.deserialize(mock); - fail("Should have thrown a BadSchemaException"); - } catch (BadSchemaException bse) { - // good - } - try { - Object o = Mockito.mock(Object.class); - ObjectInspector mockOI = Mockito.mock(ObjectInspector.class); - asd.serialize(o, mockOI); - fail("Should have thrown a BadSchemaException"); - } catch (BadSchemaException bse) { + SerDeUtils.initializeSerDe(asd, new Configuration(), props, null); + fail("Expected Exception did not be thrown"); + } catch (SerDeException e) { // good } }