diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index be4f84d..42d8050 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -310,6 +310,23 @@ minimr.query.negative.files=cluster_tasklog_retrieval.q,\ udf_local_resource.q spark.query.files=spark_test.q \ + avro_add_column.q \ + avro_add_column2.q \ + avro_add_column3.q \ + avro_change_schema.q \ + avro_compression_enabled.q \ + avro_compression_enabled_native.q \ + avro_decimal.q \ + avro_decimal_native.q \ + avro_evolved_schemas.q \ + avro_native.q \ + avro_nullable_fields.q \ + avro_partitioned.q \ + avro_partitioned_native.q \ + avro_sanity_test.q \ + avro_schema_error_message.q \ + avro_schema_evolution_native.q \ + avro_schema_literal.q \ timestamp_1.q \ timestamp_2.q \ timestamp_3.q \ diff --git a/ql/src/test/results/clientpositive/spark/avro_add_column.q.out b/ql/src/test/results/clientpositive/spark/avro_add_column.q.out new file mode 100644 index 0000000..3aae562 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_add_column.q.out @@ -0,0 +1,72 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: DESCRIBE doctors +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors +POSTHOOK: query: DESCRIBE doctors +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors +number int from deserializer +first_name string from deserializer +PREHOOK: query: ALTER TABLE doctors ADD COLUMNS (last_name string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@doctors +PREHOOK: Output: default@doctors +POSTHOOK: query: ALTER TABLE doctors ADD COLUMNS (last_name string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@doctors +POSTHOOK: Output: default@doctors +PREHOOK: query: DESCRIBE doctors +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors +POSTHOOK: query: DESCRIBE doctors +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: SELECT * FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +#### A masked pattern was here #### +1 William Hartnell +10 David Tennant +11 Matt Smith +2 Patrick Troughton +3 Jon Pertwee +4 Tom Baker +5 Peter Davison +6 Colin Baker +7 Sylvester McCoy +8 Paul McGann +9 Christopher Eccleston diff --git a/ql/src/test/results/clientpositive/spark/avro_add_column2.q.out b/ql/src/test/results/clientpositive/spark/avro_add_column2.q.out new file mode 100644 index 0000000..669a0e0 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_add_column2.q.out @@ -0,0 +1,110 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: INSERT INTO TABLE doctors_copy SELECT number, first_name FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +PREHOOK: Output: default@doctors_copy +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT INTO TABLE doctors_copy SELECT number, first_name FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +POSTHOOK: Output: default@doctors_copy +POSTHOOK: Lineage: doctors_copy.first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: doctors_copy.number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +PREHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@doctors_copy +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@doctors_copy +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_copy +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_copy +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: DESCRIBE doctors_copy +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors_copy +POSTHOOK: query: DESCRIBE doctors_copy +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors_copy +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: SELECT * FROM doctors_copy +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors_copy +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors_copy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors_copy +#### A masked pattern was here #### +1 William Hartnell +1 William NULL +10 David NULL +10 David Tennant +11 Matt NULL +11 Matt Smith +2 Patrick NULL +2 Patrick Troughton +3 Jon NULL +3 Jon Pertwee +4 Tom Baker +4 Tom NULL +5 Peter Davison +5 Peter NULL +6 Colin Baker +6 Colin NULL +7 Sylvester McCoy +7 Sylvester NULL +8 Paul McGann +8 Paul NULL +9 Christopher Eccleston +9 Christopher NULL diff --git a/ql/src/test/results/clientpositive/spark/avro_add_column3.q.out b/ql/src/test/results/clientpositive/spark/avro_add_column3.q.out new file mode 100644 index 0000000..02f2325 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_add_column3.q.out @@ -0,0 +1,101 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +PARTITIONED BY (part int) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: CREATE TABLE doctors_copy ( + number int, + first_name string) +PARTITIONED BY (part int) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: INSERT INTO TABLE doctors_copy PARTITION(part=1) SELECT number, first_name FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +PREHOOK: Output: default@doctors_copy@part=1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT INTO TABLE doctors_copy PARTITION(part=1) SELECT number, first_name FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +POSTHOOK: Output: default@doctors_copy@part=1 +POSTHOOK: Lineage: doctors_copy PARTITION(part=1).first_name SIMPLE [(doctors)doctors.FieldSchema(name:first_name, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: doctors_copy PARTITION(part=1).number SIMPLE [(doctors)doctors.FieldSchema(name:number, type:int, comment:from deserializer), ] +PREHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +PREHOOK: type: ALTERTABLE_ADDCOLS +PREHOOK: Input: default@doctors_copy +PREHOOK: Output: default@doctors_copy +POSTHOOK: query: ALTER TABLE doctors_copy ADD COLUMNS (last_name string) +POSTHOOK: type: ALTERTABLE_ADDCOLS +POSTHOOK: Input: default@doctors_copy +POSTHOOK: Output: default@doctors_copy +PREHOOK: query: DESCRIBE doctors_copy +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors_copy +POSTHOOK: query: DESCRIBE doctors_copy +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors_copy +number int from deserializer +first_name string from deserializer +last_name string from deserializer +part int + +# Partition Information +# col_name data_type comment + +part int +PREHOOK: query: SELECT * FROM doctors_copy +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors_copy +PREHOOK: Input: default@doctors_copy@part=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors_copy +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors_copy +POSTHOOK: Input: default@doctors_copy@part=1 +#### A masked pattern was here #### +1 William NULL 1 +10 David NULL 1 +11 Matt NULL 1 +2 Patrick NULL 1 +3 Jon NULL 1 +4 Tom NULL 1 +5 Peter NULL 1 +6 Colin NULL 1 +7 Sylvester NULL 1 +8 Paul NULL 1 +9 Christopher NULL 1 diff --git a/ql/src/test/results/clientpositive/spark/avro_change_schema.q.out b/ql/src/test/results/clientpositive/spark/avro_change_schema.q.out new file mode 100644 index 0000000..5d52d21 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_change_schema.q.out @@ -0,0 +1,73 @@ +PREHOOK: query: -- verify that we can update the table properties +CREATE TABLE avro2 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", + "name": "first_schema", + "type": "record", + "fields": [ + { "name":"string1", "type":"string" }, + { "name":"string2", "type":"string" } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro2 +POSTHOOK: query: -- verify that we can update the table properties +CREATE TABLE avro2 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", + "name": "first_schema", + "type": "record", + "fields": [ + { "name":"string1", "type":"string" }, + { "name":"string2", "type":"string" } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro2 +PREHOOK: query: DESCRIBE avro2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro2 +POSTHOOK: query: DESCRIBE avro2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro2 +string1 string from deserializer +string2 string from deserializer +PREHOOK: query: ALTER TABLE avro2 SET TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", + "name": "second_schema", + "type": "record", + "fields": [ + { "name":"int1", "type":"int" }, + { "name":"float1", "type":"float" }, + { "name":"double1", "type":"double" } + ] }') +PREHOOK: type: ALTERTABLE_PROPERTIES +PREHOOK: Input: default@avro2 +PREHOOK: Output: default@avro2 +POSTHOOK: query: ALTER TABLE avro2 SET TBLPROPERTIES ('avro.schema.literal'='{ "namespace": "org.apache.hive", + "name": "second_schema", + "type": "record", + "fields": [ + { "name":"int1", "type":"int" }, + { "name":"float1", "type":"float" }, + { "name":"double1", "type":"double" } + ] }') +POSTHOOK: type: ALTERTABLE_PROPERTIES +POSTHOOK: Input: default@avro2 +POSTHOOK: Output: default@avro2 +PREHOOK: query: DESCRIBE avro2 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro2 +POSTHOOK: query: DESCRIBE avro2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro2 +int1 int from deserializer +float1 float from deserializer +double1 double from deserializer diff --git a/ql/src/test/results/clientpositive/spark/avro_compression_enabled.q.out b/ql/src/test/results/clientpositive/spark/avro_compression_enabled.q.out new file mode 100644 index 0000000..bcb967a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_compression_enabled.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc:":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors4 +POSTHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc:":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors4 +PREHOOK: query: select count(*) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +500 diff --git a/ql/src/test/results/clientpositive/spark/avro_compression_enabled_native.q.out b/ql/src/test/results/clientpositive/spark/avro_compression_enabled_native.q.out new file mode 100644 index 0000000..687f17e --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_compression_enabled_native.q.out @@ -0,0 +1,39 @@ +PREHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ( + number int, + first_name string, + last_name string, + extra_field string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors4 +POSTHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 ( + number int, + first_name string, + last_name string, + extra_field string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors4 +PREHOOK: query: SELECT count(*) FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT count(*) FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +500 diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal.q.out new file mode 100644 index 0000000..7d9a77e --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_decimal.q.out @@ -0,0 +1,208 @@ +PREHOOK: query: DROP TABLE IF EXISTS dec +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dec +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dec(name string, value decimal(8,4)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dec +POSTHOOK: query: CREATE TABLE dec(name string, value decimal(8,4)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dec +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dec +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dec +PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value +PREHOOK: type: QUERY +PREHOOK: Input: default@dec +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec +#### A masked pattern was here #### +PREHOOK: query: DESC FORMATTED dec value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dec +POSTHOOK: query: DESC FORMATTED dec value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dec +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 6 from deserializer +PREHOOK: query: DROP TABLE IF EXISTS avro_dec +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS avro_dec +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE `avro_dec`( + `name` string COMMENT 'from deserializer', + `value` decimal(5,2) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ( + 'numFiles'='1', + 'avro.schema.literal'='{\"namespace\":\"com.howdy\",\"name\":\"some_schema\",\"type\":\"record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":5,\"scale\":2}}]}' +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_dec +POSTHOOK: query: CREATE TABLE `avro_dec`( + `name` string COMMENT 'from deserializer', + `value` decimal(5,2) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ( + 'numFiles'='1', + 'avro.schema.literal'='{\"namespace\":\"com.howdy\",\"name\":\"some_schema\",\"type\":\"record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":5,\"scale\":2}}]}' +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_dec +PREHOOK: query: DESC avro_dec +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_dec +POSTHOOK: query: DESC avro_dec +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_dec +name string from deserializer +value decimal(5,2) from deserializer +PREHOOK: query: INSERT OVERWRITE TABLE avro_dec select name, value from dec +PREHOOK: type: QUERY +PREHOOK: Input: default@dec +PREHOOK: Output: default@avro_dec +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE avro_dec select name, value from dec +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec +POSTHOOK: Output: default@avro_dec +POSTHOOK: Lineage: avro_dec.name SIMPLE [(dec)dec.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: avro_dec.value EXPRESSION [(dec)dec.FieldSchema(name:value, type:decimal(8,4), comment:null), ] +PREHOOK: query: SELECT * FROM avro_dec +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_dec +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_dec +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_dec +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom -12.25 +Mary 33.33 +Tom 19 +Beck 0 +Beck 79.9 +PREHOOK: query: DROP TABLE IF EXISTS avro_dec1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS avro_dec1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE `avro_dec1`( + `name` string COMMENT 'from deserializer', + `value` decimal(4,1) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ( + 'numFiles'='1', + 'avro.schema.literal'='{\"namespace\":\"com.howdy\",\"name\":\"some_schema\",\"type\":\"record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":4,\"scale\":1}}]}' +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: CREATE TABLE `avro_dec1`( + `name` string COMMENT 'from deserializer', + `value` decimal(4,1) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ( + 'numFiles'='1', + 'avro.schema.literal'='{\"namespace\":\"com.howdy\",\"name\":\"some_schema\",\"type\":\"record\",\"fields\":[{\"name\":\"name\",\"type\":\"string\"},{\"name\":\"value\",\"type\":{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":4,\"scale\":1}}]}' +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_dec1 +PREHOOK: query: DESC avro_dec1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_dec1 +POSTHOOK: query: DESC avro_dec1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_dec1 +name string from deserializer +value decimal(4,1) from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_dec1 +PREHOOK: query: select value from avro_dec1 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_dec1 +#### A masked pattern was here #### +POSTHOOK: query: select value from avro_dec1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_dec1 +#### A masked pattern was here #### +234.8 +77.3 +55.7 +4.3 +6 +12.3 +33.3 +19 +3.2 +79.9 +PREHOOK: query: DROP TABLE dec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dec +PREHOOK: Output: default@dec +POSTHOOK: query: DROP TABLE dec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec +PREHOOK: query: DROP TABLE avro_dec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_dec +PREHOOK: Output: default@avro_dec +POSTHOOK: query: DROP TABLE avro_dec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_dec +POSTHOOK: Output: default@avro_dec +PREHOOK: query: DROP TABLE avro_dec1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_dec1 +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: DROP TABLE avro_dec1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_dec1 +POSTHOOK: Output: default@avro_dec1 diff --git a/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out new file mode 100644 index 0000000..58a4f5c --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_decimal_native.q.out @@ -0,0 +1,176 @@ +PREHOOK: query: DROP TABLE IF EXISTS dec +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dec +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dec ( + name string, + value decimal(8,4)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dec +POSTHOOK: query: CREATE TABLE dec ( + name string, + value decimal(8,4)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dec +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dec +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dec +PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value +PREHOOK: type: QUERY +PREHOOK: Input: default@dec +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec +#### A masked pattern was here #### +PREHOOK: query: DESC FORMATTED dec value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dec +POSTHOOK: query: DESC FORMATTED dec value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dec +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 6 from deserializer +PREHOOK: query: DROP TABLE IF EXISTS avro_dec +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS avro_dec +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_dec( + name string, + value decimal(5,2)) +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_dec +POSTHOOK: query: CREATE TABLE avro_dec( + name string, + value decimal(5,2)) +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_dec +PREHOOK: query: DESC avro_dec +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_dec +POSTHOOK: query: DESC avro_dec +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_dec +name string from deserializer +value decimal(5,2) from deserializer +PREHOOK: query: INSERT OVERWRITE TABLE avro_dec SELECT name, value FROM dec +PREHOOK: type: QUERY +PREHOOK: Input: default@dec +PREHOOK: Output: default@avro_dec +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE avro_dec SELECT name, value FROM dec +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec +POSTHOOK: Output: default@avro_dec +POSTHOOK: Lineage: avro_dec.name SIMPLE [(dec)dec.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: avro_dec.value EXPRESSION [(dec)dec.FieldSchema(name:value, type:decimal(8,4), comment:null), ] +PREHOOK: query: SELECT * FROM avro_dec +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_dec +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_dec +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_dec +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom -12.25 +Mary 33.33 +Tom 19 +Beck 0 +Beck 79.9 +PREHOOK: query: DROP TABLE IF EXISTS avro_dec1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS avro_dec1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_dec1( + name string, + value decimal(4,1)) +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: CREATE TABLE avro_dec1( + name string, + value decimal(4,1)) +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_dec1 +PREHOOK: query: DESC avro_dec1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_dec1 +POSTHOOK: query: DESC avro_dec1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_dec1 +name string from deserializer +value decimal(4,1) from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' INTO TABLE avro_dec1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' INTO TABLE avro_dec1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_dec1 +PREHOOK: query: SELECT value FROM avro_dec1 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_dec1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT value FROM avro_dec1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_dec1 +#### A masked pattern was here #### +234.8 +77.3 +55.7 +4.3 +6 +12.3 +33.3 +19 +3.2 +79.9 +PREHOOK: query: DROP TABLE dec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dec +PREHOOK: Output: default@dec +POSTHOOK: query: DROP TABLE dec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec +PREHOOK: query: DROP TABLE avro_dec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_dec +PREHOOK: Output: default@avro_dec +POSTHOOK: query: DROP TABLE avro_dec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_dec +POSTHOOK: Output: default@avro_dec +PREHOOK: query: DROP TABLE avro_dec1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_dec1 +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: DROP TABLE avro_dec1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_dec1 +POSTHOOK: Output: default@avro_dec1 diff --git a/ql/src/test/results/clientpositive/spark/avro_evolved_schemas.q.out b/ql/src/test/results/clientpositive/spark/avro_evolved_schemas.q.out new file mode 100644 index 0000000..2f81b1a --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_evolved_schemas.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new fields in schema get propagated to table scans +CREATE TABLE doctors_with_new_field +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc:":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors_with_new_field +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new fields in schema get propagated to table scans +CREATE TABLE doctors_with_new_field +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + }, + { + "name":"extra_field", + "type":"string", + "doc:":"an extra field not in the original file", + "default":"fishfingers and custard" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors_with_new_field +PREHOOK: query: DESCRIBE doctors_with_new_field +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors_with_new_field +POSTHOOK: query: DESCRIBE doctors_with_new_field +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors_with_new_field +number int from deserializer +first_name string from deserializer +last_name string from deserializer +extra_field string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_with_new_field +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors_with_new_field +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors_with_new_field +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors_with_new_field +PREHOOK: query: SELECT * FROM doctors_with_new_field +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors_with_new_field +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors_with_new_field +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors_with_new_field +#### A masked pattern was here #### +1 William Hartnell fishfingers and custard +10 David Tennant fishfingers and custard +11 Matt Smith fishfingers and custard +2 Patrick Troughton fishfingers and custard +3 Jon Pertwee fishfingers and custard +4 Tom Baker fishfingers and custard +5 Peter Davison fishfingers and custard +6 Colin Baker fishfingers and custard +7 Sylvester McCoy fishfingers and custard +8 Paul McGann fishfingers and custard +9 Christopher Eccleston fishfingers and custard diff --git a/ql/src/test/results/clientpositive/spark/avro_native.q.out b/ql/src/test/results/clientpositive/spark/avro_native.q.out new file mode 100644 index 0000000..2de4df1 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_native.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors ( + number int, + first_name string, + last_name string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: DESCRIBE doctors +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors +POSTHOOK: query: DESCRIBE doctors +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: SELECT * FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +#### A masked pattern was here #### +1 William Hartnell +10 David Tennant +11 Matt Smith +2 Patrick Troughton +3 Jon Pertwee +4 Tom Baker +5 Peter Davison +6 Colin Baker +7 Sylvester McCoy +8 Paul McGann +9 Christopher Eccleston diff --git a/ql/src/test/results/clientpositive/spark/avro_nullable_fields.q.out b/ql/src/test/results/clientpositive/spark/avro_nullable_fields.q.out new file mode 100644 index 0000000..83cb44d --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_nullable_fields.q.out @@ -0,0 +1,174 @@ +PREHOOK: query: -- Verify that nullable fields properly work +CREATE TABLE test_serializer(string1 STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE, + list1 ARRAY, + map1 MAP, + struct1 STRUCT, + enum1 STRING, + nullableint INT, + bytes1 BINARY, + fixed1 BINARY) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_serializer +POSTHOOK: query: -- Verify that nullable fields properly work +CREATE TABLE test_serializer(string1 STRING, + int1 INT, + tinyint1 TINYINT, + smallint1 SMALLINT, + bigint1 BIGINT, + boolean1 BOOLEAN, + float1 FLOAT, + double1 DOUBLE, + list1 ARRAY, + map1 MAP, + struct1 STRUCT, + enum1 STRING, + nullableint INT, + bytes1 BINARY, + fixed1 BINARY) + ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_serializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@test_serializer +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/csv.txt' INTO TABLE test_serializer +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@test_serializer +PREHOOK: query: CREATE TABLE as_avro + ROW FORMAT + SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS + INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'='{ + "namespace": "com.howdy", + "name": "some_schema", + "type": "record", + "fields": [ + { "name": "string1", "type": ["null", "string"] }, + { "name": "int1", "type": ["null", "int"] }, + { "name": "tinyint1", "type": ["null", "int"] }, + { "name": "smallint1", "type": ["null", "int"] }, + { "name": "bigint1", "type": ["null", "long"] }, + { "name": "boolean1", "type": ["null", "boolean"] }, + { "name": "float1", "type": ["null", "float"] }, + { "name": "double1", "type": ["null", "double"] }, + { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, + { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, + { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ + { "name": "sInt", "type": "int" }, + { "name": "sBoolean", "type": "boolean" }, + { "name": "sString", "type": "string" } + ]}] }, + { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, + { "name": "nullableint", "type": ["null", "int"] }, + { "name": "bytes1", "type": ["null", "bytes"] }, + { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } + ] + }' + ) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@as_avro +POSTHOOK: query: CREATE TABLE as_avro + ROW FORMAT + SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS + INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'='{ + "namespace": "com.howdy", + "name": "some_schema", + "type": "record", + "fields": [ + { "name": "string1", "type": ["null", "string"] }, + { "name": "int1", "type": ["null", "int"] }, + { "name": "tinyint1", "type": ["null", "int"] }, + { "name": "smallint1", "type": ["null", "int"] }, + { "name": "bigint1", "type": ["null", "long"] }, + { "name": "boolean1", "type": ["null", "boolean"] }, + { "name": "float1", "type": ["null", "float"] }, + { "name": "double1", "type": ["null", "double"] }, + { "name": "list1", "type": ["null", {"type": "array", "items": "string"}] }, + { "name": "map1", "type": ["null", {"type": "map", "values": "int"}] }, + { "name": "struct1", "type": ["null", {"type": "record", "name": "struct1_name", "fields": [ + { "name": "sInt", "type": "int" }, + { "name": "sBoolean", "type": "boolean" }, + { "name": "sString", "type": "string" } + ]}] }, + { "name": "enum1", "type": ["null", {"type": "enum", "name": "enum1_values", "symbols": ["BLUE", "RED", "GREEN"]}] }, + { "name": "nullableint", "type": ["null", "int"] }, + { "name": "bytes1", "type": ["null", "bytes"] }, + { "name": "fixed1", "type": ["null", {"type": "fixed", "name": "threebytes", "size": 3}] } + ] + }' + ) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@as_avro +PREHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer +PREHOOK: type: QUERY +PREHOOK: Input: default@test_serializer +PREHOOK: Output: default@as_avro +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE as_avro SELECT * FROM test_serializer +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_serializer +POSTHOOK: Output: default@as_avro +POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ] +POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ] +POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ] +POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ] +POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ] +POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ] +POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ] +POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ] +POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array, comment:null), ] +POSTHOOK: Lineage: as_avro.map1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:map1, type:map, comment:null), ] +POSTHOOK: Lineage: as_avro.nullableint SIMPLE [(test_serializer)test_serializer.FieldSchema(name:nullableint, type:int, comment:null), ] +POSTHOOK: Lineage: as_avro.smallint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:smallint1, type:smallint, comment:null), ] +POSTHOOK: Lineage: as_avro.string1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:string1, type:string, comment:null), ] +POSTHOOK: Lineage: as_avro.struct1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:struct1, type:struct, comment:null), ] +POSTHOOK: Lineage: as_avro.tinyint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:tinyint1, type:tinyint, comment:null), ] +PREHOOK: query: SELECT * FROM as_avro +PREHOOK: type: QUERY +PREHOOK: Input: default@as_avro +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM as_avro +POSTHOOK: type: QUERY +POSTHOOK: Input: default@as_avro +#### A masked pattern was here #### +why hello there 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +another record 98 4 101 9999999 false 99.89 9.0E-8 ["beta"] {"Earth":101} {"sint":1134,"sboolean":false,"sstring":"wazzup"} RED NULL  ef +third record 45 5 102 999999999 true 89.99 9.0E-14 ["alpha","gamma"] {"Earth":237,"Bob":723} {"sint":102,"sboolean":false,"sstring":"BNL"} GREEN NULL  hi +NULL 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string NULL 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 NULL 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 NULL 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 NULL true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 NULL 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true NULL 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 NULL ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 NULL {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] NULL {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} NULL BLUE 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} NULL 72  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE NULL  bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL bc +string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72  NULL diff --git a/ql/src/test/results/clientpositive/spark/avro_partitioned.q.out b/ql/src/test/results/clientpositive/spark/avro_partitioned.q.out new file mode 100644 index 0000000..e7c3b5f --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_partitioned.q.out @@ -0,0 +1,483 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: CREATE TABLE episodes_partitioned +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes_partitioned +POSTHOOK: query: CREATE TABLE episodes_partitioned +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes_partitioned +PREHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes +PREHOOK: Output: default@episodes_partitioned +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +Rose 26 March 2005 9 9 +The Doctor's Wife 14 May 2011 11 11 +The Eleventh Hour 3 April 2010 11 11 +PREHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 1 1 +Horror of Fang Rock 3 September 1977 4 4 +Rose 26 March 2005 9 9 +The Doctor's Wife 14 May 2011 11 11 +The Eleventh Hour 3 April 2010 11 11 +PREHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +The Mysterious Planet 6 September 1986 6 6 +PREHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +PREHOOK: query: -- Alter table add an empty partition +ALTER TABLE episodes_partitioned ADD PARTITION (doctor_pt=7) +PREHOOK: type: ALTERTABLE_ADDPARTS +PREHOOK: Output: default@episodes_partitioned +POSTHOOK: query: -- Alter table add an empty partition +ALTER TABLE episodes_partitioned ADD PARTITION (doctor_pt=7) +POSTHOOK: type: ALTERTABLE_ADDPARTS +POSTHOOK: Output: default@episodes_partitioned +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=7 +PREHOOK: query: SELECT COUNT(*) FROM episodes_partitioned +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=7 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(*) FROM episodes_partitioned +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=7 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +8 +PREHOOK: query: -- Verify that reading from an Avro partition works +-- even if it has an old schema relative to the current table level schema + +-- Create table and store schema in SERDEPROPERTIES +CREATE TABLE episodes_partitioned_serdeproperties +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes_partitioned_serdeproperties +POSTHOOK: query: -- Verify that reading from an Avro partition works +-- even if it has an old schema relative to the current table level schema + +-- Create table and store schema in SERDEPROPERTIES +CREATE TABLE episodes_partitioned_serdeproperties +PARTITIONED BY (doctor_pt INT) +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes_partitioned_serdeproperties +PREHOOK: query: -- Insert data into a partition +INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes +PREHOOK: Output: default@episodes_partitioned_serdeproperties +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: -- Insert data into a partition +INSERT INTO TABLE episodes_partitioned_serdeproperties PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=1 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=11 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=2 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=4 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=5 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=6 +POSTHOOK: Output: default@episodes_partitioned_serdeproperties@doctor_pt=9 +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned_serdeproperties PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: -- Evolve the table schema by adding new array field "cast_and_crew" +ALTER TABLE episodes_partitioned_serdeproperties +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"cast_and_crew", + "type":{"type":"array","items":"string"}, + "default":[] + }, + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@episodes_partitioned_serdeproperties +PREHOOK: Output: default@episodes_partitioned_serdeproperties +POSTHOOK: query: -- Evolve the table schema by adding new array field "cast_and_crew" +ALTER TABLE episodes_partitioned_serdeproperties +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"cast_and_crew", + "type":{"type":"array","items":"string"}, + "default":[] + }, + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + } + ] +}') +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@episodes_partitioned_serdeproperties +POSTHOOK: Output: default@episodes_partitioned_serdeproperties +PREHOOK: query: -- Try selecting from the evolved table +SELECT * FROM episodes_partitioned_serdeproperties +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned_serdeproperties +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: -- Try selecting from the evolved table +SELECT * FROM episodes_partitioned_serdeproperties +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned_serdeproperties +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned_serdeproperties@doctor_pt=9 +#### A masked pattern was here #### +[] An Unearthly Child 23 November 1963 1 1 +[] Castrolava 4 January 1982 5 5 +[] Horror of Fang Rock 3 September 1977 4 4 +[] Rose 26 March 2005 9 9 +[] The Doctor's Wife 14 May 2011 11 11 +[] The Eleventh Hour 3 April 2010 11 11 +[] The Mysterious Planet 6 September 1986 6 6 +[] The Power of the Daleks 5 November 1966 2 2 diff --git a/ql/src/test/results/clientpositive/spark/avro_partitioned_native.q.out b/ql/src/test/results/clientpositive/spark/avro_partitioned_native.q.out new file mode 100644 index 0000000..311de4c --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_partitioned_native.q.out @@ -0,0 +1,151 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: CREATE TABLE episodes_partitioned ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes_partitioned +POSTHOOK: query: CREATE TABLE episodes_partitioned ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes_partitioned +PREHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) +SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes +PREHOOK: Output: default@episodes_partitioned +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) +SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +Rose 26 March 2005 9 9 +The Doctor's Wife 14 May 2011 11 11 +The Eleventh Hour 3 April 2010 11 11 +PREHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 1 1 +Horror of Fang Rock 3 September 1977 4 4 +Rose 26 March 2005 9 9 +The Doctor's Wife 14 May 2011 11 11 +The Eleventh Hour 3 April 2010 11 11 +PREHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +The Mysterious Planet 6 September 1986 6 6 +PREHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/avro_sanity_test.q.out b/ql/src/test/results/clientpositive/spark/avro_sanity_test.q.out new file mode 100644 index 0000000..687579e --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_sanity_test.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + } + ] +}') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@doctors +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "doctors", + "type": "record", + "fields": [ + { + "name":"number", + "type":"int", + "doc":"Order of playing the role" + }, + { + "name":"first_name", + "type":"string", + "doc":"first name of actor playing role" + }, + { + "name":"last_name", + "type":"string", + "doc":"last name of actor playing role" + } + ] +}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors +PREHOOK: query: DESCRIBE doctors +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors +POSTHOOK: query: DESCRIBE doctors +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors +PREHOOK: query: SELECT * FROM doctors +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM doctors +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors +#### A masked pattern was here #### +1 William Hartnell +10 David Tennant +11 Matt Smith +2 Patrick Troughton +3 Jon Pertwee +4 Tom Baker +5 Peter Davison +6 Colin Baker +7 Sylvester McCoy +8 Paul McGann +9 Christopher Eccleston diff --git a/ql/src/test/results/clientpositive/spark/avro_schema_error_message.q.out b/ql/src/test/results/clientpositive/spark/avro_schema_error_message.q.out new file mode 100644 index 0000000..967a847 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_schema_error_message.q.out @@ -0,0 +1,35 @@ +PREHOOK: query: -- verify we get the sentinel schema if we don't provide one + +CREATE TABLE avro_with_no_schema +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro_with_no_schema +POSTHOOK: query: -- verify we get the sentinel schema if we don't provide one + +CREATE TABLE avro_with_no_schema +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_with_no_schema +PREHOOK: query: DESCRIBE avro_with_no_schema +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_with_no_schema +POSTHOOK: query: DESCRIBE avro_with_no_schema +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_with_no_schema +error_error_error_error_error_error_error string from deserializer +cannot_determine_schema string from deserializer +check string from deserializer +schema string from deserializer +url string from deserializer +and string from deserializer +literal string from deserializer diff --git a/ql/src/test/results/clientpositive/spark/avro_schema_evolution_native.q.out b/ql/src/test/results/clientpositive/spark/avro_schema_evolution_native.q.out new file mode 100644 index 0000000..2f434c3 --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_schema_evolution_native.q.out @@ -0,0 +1,219 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: CREATE TABLE episodes_partitioned ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@episodes_partitioned +POSTHOOK: query: CREATE TABLE episodes_partitioned ( + title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes_partitioned +PREHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) +SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes +PREHOOK: Output: default@episodes_partitioned +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) +SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: ALTER TABLE episodes_partitioned +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH +SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + }, + { + "name":"value", + "type":"int", + "default":0, + "doc":"default value" + } + ] +}') +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Output: default@episodes_partitioned +POSTHOOK: query: ALTER TABLE episodes_partitioned +SET SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +WITH +SERDEPROPERTIES ('avro.schema.literal'='{ + "namespace": "testing.hive.avro.serde", + "name": "episodes", + "type": "record", + "fields": [ + { + "name":"title", + "type":"string", + "doc":"episode title" + }, + { + "name":"air_date", + "type":"string", + "doc":"initial date" + }, + { + "name":"doctor", + "type":"int", + "doc":"main actor playing the Doctor in episode" + }, + { + "name":"value", + "type":"int", + "default":0, + "doc":"default value" + } + ] +}') +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Output: default@episodes_partitioned +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +Rose 26 March 2005 9 0 9 +The Doctor's Wife 14 May 2011 11 0 11 +The Eleventh Hour 3 April 2010 11 0 11 +PREHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 1 0 1 +Horror of Fang Rock 3 September 1977 4 0 4 +Rose 26 March 2005 9 0 9 +The Doctor's Wife 14 May 2011 11 0 11 +The Eleventh Hour 3 April 2010 11 0 11 +PREHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +The Mysterious Planet 6 September 1986 6 0 6 +PREHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/avro_schema_literal.q.out b/ql/src/test/results/clientpositive/spark/avro_schema_literal.q.out new file mode 100644 index 0000000..b13694d --- /dev/null +++ b/ql/src/test/results/clientpositive/spark/avro_schema_literal.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: CREATE TABLE avro1 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "big_old_schema", + "type": "record", + "fields": [ + { "name":"string1", "type":"string" }, + { "name":"int1", "type":"int" }, + { "name":"tinyint1", "type":"int" }, + { "name":"smallint1", "type":"int" }, + { "name":"bigint1", "type":"long" }, + { "name":"boolean1", "type":"boolean" }, + { "name":"float1", "type":"float" }, + { "name":"double1", "type":"double" }, + { "name":"list1", "type":{"type":"array", "items":"string"} }, + { "name":"map1", "type":{"type":"map", "values":"int"} }, + { "name":"struct1", "type":{"type":"record", "name":"struct1_name", "fields": [ + { "name":"sInt", "type":"int" }, { "name":"sBoolean", "type":"boolean" }, { "name":"sString", "type":"string" } ] } }, + { "name":"union1", "type":["float", "boolean", "string"] }, + { "name":"enum1", "type":{"type":"enum", "name":"enum1_values", "symbols":["BLUE","RED", "GREEN"]} }, + { "name":"nullableint", "type":["int", "null"] }, + { "name":"bytes1", "type":"bytes" }, + { "name":"fixed1", "type":{"type":"fixed", "name":"threebytes", "size":3} }, + { "name":"dec1", "type":{"type":"bytes", "logicalType":"decimal", "precision":5, "scale":2} } + ] }') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@avro1 +POSTHOOK: query: CREATE TABLE avro1 +ROW FORMAT +SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS +INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +TBLPROPERTIES ('avro.schema.literal'='{ + "namespace": "org.apache.hive", + "name": "big_old_schema", + "type": "record", + "fields": [ + { "name":"string1", "type":"string" }, + { "name":"int1", "type":"int" }, + { "name":"tinyint1", "type":"int" }, + { "name":"smallint1", "type":"int" }, + { "name":"bigint1", "type":"long" }, + { "name":"boolean1", "type":"boolean" }, + { "name":"float1", "type":"float" }, + { "name":"double1", "type":"double" }, + { "name":"list1", "type":{"type":"array", "items":"string"} }, + { "name":"map1", "type":{"type":"map", "values":"int"} }, + { "name":"struct1", "type":{"type":"record", "name":"struct1_name", "fields": [ + { "name":"sInt", "type":"int" }, { "name":"sBoolean", "type":"boolean" }, { "name":"sString", "type":"string" } ] } }, + { "name":"union1", "type":["float", "boolean", "string"] }, + { "name":"enum1", "type":{"type":"enum", "name":"enum1_values", "symbols":["BLUE","RED", "GREEN"]} }, + { "name":"nullableint", "type":["int", "null"] }, + { "name":"bytes1", "type":"bytes" }, + { "name":"fixed1", "type":{"type":"fixed", "name":"threebytes", "size":3} }, + { "name":"dec1", "type":{"type":"bytes", "logicalType":"decimal", "precision":5, "scale":2} } + ] }') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro1 +PREHOOK: query: DESCRIBE avro1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro1 +POSTHOOK: query: DESCRIBE avro1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro1 +string1 string from deserializer +int1 int from deserializer +tinyint1 int from deserializer +smallint1 int from deserializer +bigint1 bigint from deserializer +boolean1 boolean from deserializer +float1 float from deserializer +double1 double from deserializer +list1 array from deserializer +map1 map from deserializer +struct1 struct from deserializer +union1 uniontype from deserializer +enum1 string from deserializer +nullableint int from deserializer +bytes1 binary from deserializer +fixed1 binary from deserializer +dec1 decimal(5,2) from deserializer