diff --git data/files/avro_items data/files/avro_items new file mode 100644 index 0000000..e3b4183 --- /dev/null +++ data/files/avro_items @@ -0,0 +1,10 @@ +1|Toy Story (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Toy%20Story%20(1995)|0|0|0|1|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0 +2|GoldenEye (1995)|1995-01-01||http://us.imdb.com/M/title-exact?GoldenEye%20(1995)|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0 +3|Four Rooms (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995)|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|0|1|0|0 +4|Get Shorty (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995)|0|1|0|0|0|1|0|0|1|0|0|0|0|0|0|0|0|0|0 +5|Copycat (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Copycat%20(1995)|0|0|0|0|0|0|1|0|1|0|0|0|0|0|0|0|1|0|0 +6|Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)|1995-01-01||http://us.imdb.com/Title?Yao+a+yao+yao+dao+waipo+qiao+(1995)|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0 +7|Twelve Monkeys (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Twelve%20Monkeys%20(1995)|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0|1|0|0|0 +8|Babe (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Babe%20(1995)|0|0|0|0|1|1|0|0|1|0|0|0|0|0|0|0|0|0|0 +9|Dead Man Walking (1995)|1995-01-01||http://us.imdb.com/M/title-exact?Dead%20Man%20Walking%20(1995)|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|0|0 +10|Richard III (1995)|1996-01-22||http://us.imdb.com/M/title-exact?Richard%20III%20(1995)|0|0|0|0|0|0|0|0|1|0|0|0|0|0|0|0|0|1|0 diff --git data/files/avro_items_schema.avsc data/files/avro_items_schema.avsc new file mode 100644 index 0000000..8ed30e5 --- /dev/null +++ data/files/avro_items_schema.avsc @@ -0,0 +1,117 @@ +{ + "type" : "record", + "name" : "ml_items", + "doc" : "Sqoop import of ml_items", + "fields" : [ { + "name" : "id", + "type" : [ "int", "null" ], + "columnName" : "id", + "sqlType" : "4" + }, { + "name" : "title", + "type" : [ "string", "null" ], + "columnName" : "title", + "sqlType" : "12" + }, { + "name" : "imdb_url", + "type" : [ "string", "null" ], + "columnName" : "imdb_url", + "sqlType" : "12" + }, { + "name" : "unknown_genre", + "type" : [ "int", "null" ], + "columnName" : "unknown_genre", + "sqlType" : "-6" + }, { + "name" : "action", + "type" : [ "int", "null" ], + "columnName" : "action", + "sqlType" : "-6" + }, { + "name" : "adventure", + "type" : [ "int", "null" ], + "columnName" : "adventure", + "sqlType" : "-6" + }, { + "name" : "animation", + "type" : [ "int", "null" ], + "columnName" : "animation", + "sqlType" : "-6" + }, { + "name" : "children", + "type" : [ "int", "null" ], + "columnName" : "children", + "sqlType" : "-6" + }, { + "name" : "comedy", + "type" : [ "int", "null" ], + "columnName" : "comedy", + "sqlType" : "-6" + }, { + "name" : "crime", + "type" : [ "int", "null" ], + "columnName" : "crime", + "sqlType" : "-6" + }, { + "name" : "documentary", + "type" : [ "int", "null" ], + "columnName" : "documentary", + "sqlType" : "-6" + }, { + "name" : "drama", + "type" : [ "int", "null" ], + "columnName" : "drama", + "sqlType" : "-6" + }, { + "name" : "fantasy", + "type" : [ "int", "null" ], + "columnName" : "fantasy", + "sqlType" : "-6" + }, { + "name" : "film_noir", + "type" : [ "int", "null" ], + "columnName" : "film_noir", + "sqlType" : "-6" + }, { + "name" : "horror", + "type" : [ "int", "null" ], + "columnName" : "horror", + "sqlType" : "-6" + }, { + "name" : "musical", + "type" : [ "int", "null" ], + "columnName" : "musical", + "sqlType" : "-6" + }, { + "name" : "mystery", + "type" : [ "int", "null" ], + "columnName" : "mystery", + "sqlType" : "-6" + }, { + "name" : "romance", + "type" : [ "int", "null" ], + "columnName" : "romance", + "sqlType" : "-6" + }, { + "name" : "sci_fi", + "type" : [ "int", "null" ], + "columnName" : "sci_fi", + "sqlType" : "-6" + }, { + "name" : "thriller", + "type" : [ "int", "null" ], + "columnName" : "thriller", + "sqlType" : "-6" + }, { + "name" : "war", + "type" : [ "int", "null" ], + "columnName" : "war", + "sqlType" : "-6" + }, { + "name" : "western", + "type" : [ "int", "null" ], + "columnName" : "western", + "sqlType" : "-6" + } ], + "tableName" : "ml_items" +} diff --git ql/src/test/queries/clientpositive/avro_backwardcomp.q ql/src/test/queries/clientpositive/avro_backwardcomp.q new file mode 100644 index 0000000..15e1e2a --- /dev/null +++ ql/src/test/queries/clientpositive/avro_backwardcomp.q @@ -0,0 +1,193 @@ +DROP TABLE IF EXISTS ml_items; + +CREATE TABLE ml_items(id INT, + title STRING, + release_date STRING, + video_release_date STRING, + imdb_url STRING, + unknown_genre TINYINT, + action TINYINT, + adventure TINYINT, + animation TINYINT, + children TINYINT, + comedy TINYINT, + crime TINYINT, + documentary TINYINT, + drama TINYINT, + fantasy TINYINT, + film_noir TINYINT, + horror TINYINT, + musical TINYINT, + mystery TINYINT, + romance TINYINT, + sci_fi TINYINT, + thriller TINYINT, + war TINYINT, + western TINYINT) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' + STORED AS TEXTFILE; +LOAD DATA LOCAL INPATH '../data/files/avro_items' INTO TABLE ml_items; + +select * from ml_items ORDER BY id ASC; + +DROP TABLE IF EXISTS ml_items_as_avro; +CREATE EXTERNAL TABLE ml_items_as_avro + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + WITH SERDEPROPERTIES ( + 'schema.url'='file:${system:test.src.data.dir}/files/avro_items_schema.avsc') + STORED as INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + LOCATION 'file:${system:test.tmp.dir}/hive-ml-items'; + +describe ml_items_as_avro; + +INSERT OVERWRITE TABLE ml_items_as_avro + SELECT id, title, + imdb_url, unknown_genre, action, adventure, animation, children, comedy, crime, + documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, + sci_fi, thriller, war, western + FROM ml_items; + +select * from ml_items_as_avro ORDER BY id ASC; + +DROP TABLE IF EXISTS ml_items_as_avro_2; +CREATE EXTERNAL TABLE ml_items_as_avro_2 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + WITH SERDEPROPERTIES ( + 'schema.literal'='{ + "type" : "record", + "name" : "ml_items", + "doc" : "Sqoop import of ml_items", + "fields" : [ { + "name" : "id", + "type" : [ "int", "null" ], + "columnName" : "id", + "sqlType" : "4" + }, { + "name" : "title", + "type" : [ "string", "null" ], + "columnName" : "title", + "sqlType" : "12" + }, { + "name" : "imdb_url", + "type" : [ "string", "null" ], + "columnName" : "imdb_url", + "sqlType" : "12" + }, { + "name" : "unknown_genre", + "type" : [ "int", "null" ], + "columnName" : "unknown_genre", + "sqlType" : "-6" + }, { + "name" : "action", + "type" : [ "int", "null" ], + "columnName" : "action", + "sqlType" : "-6" + }, { + "name" : "adventure", + "type" : [ "int", "null" ], + "columnName" : "adventure", + "sqlType" : "-6" + }, { + "name" : "animation", + "type" : [ "int", "null" ], + "columnName" : "animation", + "sqlType" : "-6" + }, { + "name" : "children", + "type" : [ "int", "null" ], + "columnName" : "children", + "sqlType" : "-6" + }, { + "name" : "comedy", + "type" : [ "int", "null" ], + "columnName" : "comedy", + "sqlType" : "-6" + }, { + "name" : "crime", + "type" : [ "int", "null" ], + "columnName" : "crime", + "sqlType" : "-6" + }, { + "name" : "documentary", + "type" : [ "int", "null" ], + "columnName" : "documentary", + "sqlType" : "-6" + }, { + "name" : "drama", + "type" : [ "int", "null" ], + "columnName" : "drama", + "sqlType" : "-6" + }, { + "name" : "fantasy", + "type" : [ "int", "null" ], + "columnName" : "fantasy", + "sqlType" : "-6" + }, { + "name" : "film_noir", + "type" : [ "int", "null" ], + "columnName" : "film_noir", + "sqlType" : "-6" + }, { + "name" : "horror", + "type" : [ "int", "null" ], + "columnName" : "horror", + "sqlType" : "-6" + }, { + "name" : "musical", + "type" : [ "int", "null" ], + "columnName" : "musical", + "sqlType" : "-6" + }, { + "name" : "mystery", + "type" : [ "int", "null" ], + "columnName" : "mystery", + "sqlType" : "-6" + }, { + "name" : "romance", + "type" : [ "int", "null" ], + "columnName" : "romance", + "sqlType" : "-6" + }, { + "name" : "sci_fi", + "type" : [ "int", "null" ], + "columnName" : "sci_fi", + "sqlType" : "-6" + }, { + "name" : "thriller", + "type" : [ "int", "null" ], + "columnName" : "thriller", + "sqlType" : "-6" + }, { + "name" : "war", + "type" : [ "int", "null" ], + "columnName" : "war", + "sqlType" : "-6" + }, { + "name" : "western", + "type" : [ "int", "null" ], + "columnName" : "western", + "sqlType" : "-6" + } ], + "tableName" : "ml_items" + }') + STORED as INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + LOCATION 'file:${system:test.tmp.dir}/hive-ml-items'; + +describe ml_items_as_avro_2; + +INSERT OVERWRITE TABLE ml_items_as_avro_2 + SELECT id, title, + imdb_url, unknown_genre, action, adventure, animation, children, comedy, crime, + documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, + sci_fi, thriller, war, western + FROM ml_items; + +select * from ml_items_as_avro_2 ORDER BY id ASC; diff --git ql/src/test/results/clientpositive/avro_backwardcomp.q.out ql/src/test/results/clientpositive/avro_backwardcomp.q.out new file mode 100644 index 0000000..58d02f1 --- /dev/null +++ ql/src/test/results/clientpositive/avro_backwardcomp.q.out @@ -0,0 +1,691 @@ +PREHOOK: query: DROP TABLE IF EXISTS ml_items +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS ml_items +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE ml_items(id INT, + title STRING, + release_date STRING, + video_release_date STRING, + imdb_url STRING, + unknown_genre TINYINT, + action TINYINT, + adventure TINYINT, + animation TINYINT, + children TINYINT, + comedy TINYINT, + crime TINYINT, + documentary TINYINT, + drama TINYINT, + fantasy TINYINT, + film_noir TINYINT, + horror TINYINT, + musical TINYINT, + mystery TINYINT, + romance TINYINT, + sci_fi TINYINT, + thriller TINYINT, + war TINYINT, + western TINYINT) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' + STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE ml_items(id INT, + title STRING, + release_date STRING, + video_release_date STRING, + imdb_url STRING, + unknown_genre TINYINT, + action TINYINT, + adventure TINYINT, + animation TINYINT, + children TINYINT, + comedy TINYINT, + crime TINYINT, + documentary TINYINT, + drama TINYINT, + fantasy TINYINT, + film_noir TINYINT, + horror TINYINT, + musical TINYINT, + mystery TINYINT, + romance TINYINT, + sci_fi TINYINT, + thriller TINYINT, + war TINYINT, + western TINYINT) + ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' + STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@ml_items +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/avro_items' INTO TABLE ml_items +PREHOOK: type: LOAD +PREHOOK: Output: default@ml_items +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/avro_items' INTO TABLE ml_items +POSTHOOK: type: LOAD +POSTHOOK: Output: default@ml_items +PREHOOK: query: select * from ml_items ORDER BY id ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@ml_items +#### A masked pattern was here #### +POSTHOOK: query: select * from ml_items ORDER BY id ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ml_items +#### A masked pattern was here #### +1 Toy Story (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Toy%20Story%20(1995) 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 GoldenEye (1995) 1995-01-01 http://us.imdb.com/M/title-exact?GoldenEye%20(1995) 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +3 Four Rooms (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +4 Get Shorty (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995) 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 +5 Copycat (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 +6 Shanghai Triad (Yao a yao yao dao waipo qiao) (1995) 1995-01-01 http://us.imdb.com/Title?Yao+a+yao+yao+dao+waipo+qiao+(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +7 Twelve Monkeys (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Twelve%20Monkeys%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 +8 Babe (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Babe%20(1995) 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 +9 Dead Man Walking (1995) 1995-01-01 http://us.imdb.com/M/title-exact?Dead%20Man%20Walking%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +10 Richard III (1995) 1996-01-22 http://us.imdb.com/M/title-exact?Richard%20III%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 +PREHOOK: query: DROP TABLE IF EXISTS ml_items_as_avro +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS ml_items_as_avro +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE ml_items_as_avro + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + WITH SERDEPROPERTIES ( +#### A masked pattern was here #### + STORED as INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE ml_items_as_avro + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + WITH SERDEPROPERTIES ( +#### A masked pattern was here #### + STORED as INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@ml_items_as_avro +PREHOOK: query: describe ml_items_as_avro +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe ml_items_as_avro +POSTHOOK: type: DESCTABLE +id int from deserializer +title string from deserializer +imdb_url string from deserializer +unknown_genre int from deserializer +action int from deserializer +adventure int from deserializer +animation int from deserializer +children int from deserializer +comedy int from deserializer +crime int from deserializer +documentary int from deserializer +drama int from deserializer +fantasy int from deserializer +film_noir int from deserializer +horror int from deserializer +musical int from deserializer +mystery int from deserializer +romance int from deserializer +sci_fi int from deserializer +thriller int from deserializer +war int from deserializer +western int from deserializer +PREHOOK: query: INSERT OVERWRITE TABLE ml_items_as_avro + SELECT id, title, + imdb_url, unknown_genre, action, adventure, animation, children, comedy, crime, + documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, + sci_fi, thriller, war, western + FROM ml_items +PREHOOK: type: QUERY +PREHOOK: Input: default@ml_items +PREHOOK: Output: default@ml_items_as_avro +POSTHOOK: query: INSERT OVERWRITE TABLE ml_items_as_avro + SELECT id, title, + imdb_url, unknown_genre, action, adventure, animation, children, comedy, crime, + documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, + sci_fi, thriller, war, western + FROM ml_items +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ml_items +POSTHOOK: Output: default@ml_items_as_avro +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +PREHOOK: query: select * from ml_items_as_avro ORDER BY id ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@ml_items_as_avro +#### A masked pattern was here #### +POSTHOOK: query: select * from ml_items_as_avro ORDER BY id ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ml_items_as_avro +#### A masked pattern was here #### +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +1 Toy Story (1995) http://us.imdb.com/M/title-exact?Toy%20Story%20(1995) 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 GoldenEye (1995) http://us.imdb.com/M/title-exact?GoldenEye%20(1995) 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +3 Four Rooms (1995) http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +4 Get Shorty (1995) http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995) 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 +5 Copycat (1995) http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 +6 Shanghai Triad (Yao a yao yao dao waipo qiao) (1995) http://us.imdb.com/Title?Yao+a+yao+yao+dao+waipo+qiao+(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +7 Twelve Monkeys (1995) http://us.imdb.com/M/title-exact?Twelve%20Monkeys%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 +8 Babe (1995) http://us.imdb.com/M/title-exact?Babe%20(1995) 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 +9 Dead Man Walking (1995) http://us.imdb.com/M/title-exact?Dead%20Man%20Walking%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +10 Richard III (1995) http://us.imdb.com/M/title-exact?Richard%20III%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 +PREHOOK: query: DROP TABLE IF EXISTS ml_items_as_avro_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS ml_items_as_avro_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +PREHOOK: query: CREATE EXTERNAL TABLE ml_items_as_avro_2 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + WITH SERDEPROPERTIES ( + 'schema.literal'='{ + "type" : "record", + "name" : "ml_items", + "doc" : "Sqoop import of ml_items", + "fields" : [ { + "name" : "id", + "type" : [ "int", "null" ], + "columnName" : "id", + "sqlType" : "4" + }, { + "name" : "title", + "type" : [ "string", "null" ], + "columnName" : "title", + "sqlType" : "12" + }, { + "name" : "imdb_url", + "type" : [ "string", "null" ], + "columnName" : "imdb_url", + "sqlType" : "12" + }, { + "name" : "unknown_genre", + "type" : [ "int", "null" ], + "columnName" : "unknown_genre", + "sqlType" : "-6" + }, { + "name" : "action", + "type" : [ "int", "null" ], + "columnName" : "action", + "sqlType" : "-6" + }, { + "name" : "adventure", + "type" : [ "int", "null" ], + "columnName" : "adventure", + "sqlType" : "-6" + }, { + "name" : "animation", + "type" : [ "int", "null" ], + "columnName" : "animation", + "sqlType" : "-6" + }, { + "name" : "children", + "type" : [ "int", "null" ], + "columnName" : "children", + "sqlType" : "-6" + }, { + "name" : "comedy", + "type" : [ "int", "null" ], + "columnName" : "comedy", + "sqlType" : "-6" + }, { + "name" : "crime", + "type" : [ "int", "null" ], + "columnName" : "crime", + "sqlType" : "-6" + }, { + "name" : "documentary", + "type" : [ "int", "null" ], + "columnName" : "documentary", + "sqlType" : "-6" + }, { + "name" : "drama", + "type" : [ "int", "null" ], + "columnName" : "drama", + "sqlType" : "-6" + }, { + "name" : "fantasy", + "type" : [ "int", "null" ], + "columnName" : "fantasy", + "sqlType" : "-6" + }, { + "name" : "film_noir", + "type" : [ "int", "null" ], + "columnName" : "film_noir", + "sqlType" : "-6" + }, { + "name" : "horror", + "type" : [ "int", "null" ], + "columnName" : "horror", + "sqlType" : "-6" + }, { + "name" : "musical", + "type" : [ "int", "null" ], + "columnName" : "musical", + "sqlType" : "-6" + }, { + "name" : "mystery", + "type" : [ "int", "null" ], + "columnName" : "mystery", + "sqlType" : "-6" + }, { + "name" : "romance", + "type" : [ "int", "null" ], + "columnName" : "romance", + "sqlType" : "-6" + }, { + "name" : "sci_fi", + "type" : [ "int", "null" ], + "columnName" : "sci_fi", + "sqlType" : "-6" + }, { + "name" : "thriller", + "type" : [ "int", "null" ], + "columnName" : "thriller", + "sqlType" : "-6" + }, { + "name" : "war", + "type" : [ "int", "null" ], + "columnName" : "war", + "sqlType" : "-6" + }, { + "name" : "western", + "type" : [ "int", "null" ], + "columnName" : "western", + "sqlType" : "-6" + } ], + "tableName" : "ml_items" + }') + STORED as INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE EXTERNAL TABLE ml_items_as_avro_2 + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + WITH SERDEPROPERTIES ( + 'schema.literal'='{ + "type" : "record", + "name" : "ml_items", + "doc" : "Sqoop import of ml_items", + "fields" : [ { + "name" : "id", + "type" : [ "int", "null" ], + "columnName" : "id", + "sqlType" : "4" + }, { + "name" : "title", + "type" : [ "string", "null" ], + "columnName" : "title", + "sqlType" : "12" + }, { + "name" : "imdb_url", + "type" : [ "string", "null" ], + "columnName" : "imdb_url", + "sqlType" : "12" + }, { + "name" : "unknown_genre", + "type" : [ "int", "null" ], + "columnName" : "unknown_genre", + "sqlType" : "-6" + }, { + "name" : "action", + "type" : [ "int", "null" ], + "columnName" : "action", + "sqlType" : "-6" + }, { + "name" : "adventure", + "type" : [ "int", "null" ], + "columnName" : "adventure", + "sqlType" : "-6" + }, { + "name" : "animation", + "type" : [ "int", "null" ], + "columnName" : "animation", + "sqlType" : "-6" + }, { + "name" : "children", + "type" : [ "int", "null" ], + "columnName" : "children", + "sqlType" : "-6" + }, { + "name" : "comedy", + "type" : [ "int", "null" ], + "columnName" : "comedy", + "sqlType" : "-6" + }, { + "name" : "crime", + "type" : [ "int", "null" ], + "columnName" : "crime", + "sqlType" : "-6" + }, { + "name" : "documentary", + "type" : [ "int", "null" ], + "columnName" : "documentary", + "sqlType" : "-6" + }, { + "name" : "drama", + "type" : [ "int", "null" ], + "columnName" : "drama", + "sqlType" : "-6" + }, { + "name" : "fantasy", + "type" : [ "int", "null" ], + "columnName" : "fantasy", + "sqlType" : "-6" + }, { + "name" : "film_noir", + "type" : [ "int", "null" ], + "columnName" : "film_noir", + "sqlType" : "-6" + }, { + "name" : "horror", + "type" : [ "int", "null" ], + "columnName" : "horror", + "sqlType" : "-6" + }, { + "name" : "musical", + "type" : [ "int", "null" ], + "columnName" : "musical", + "sqlType" : "-6" + }, { + "name" : "mystery", + "type" : [ "int", "null" ], + "columnName" : "mystery", + "sqlType" : "-6" + }, { + "name" : "romance", + "type" : [ "int", "null" ], + "columnName" : "romance", + "sqlType" : "-6" + }, { + "name" : "sci_fi", + "type" : [ "int", "null" ], + "columnName" : "sci_fi", + "sqlType" : "-6" + }, { + "name" : "thriller", + "type" : [ "int", "null" ], + "columnName" : "thriller", + "sqlType" : "-6" + }, { + "name" : "war", + "type" : [ "int", "null" ], + "columnName" : "war", + "sqlType" : "-6" + }, { + "name" : "western", + "type" : [ "int", "null" ], + "columnName" : "western", + "sqlType" : "-6" + } ], + "tableName" : "ml_items" + }') + STORED as INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@ml_items_as_avro_2 +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +PREHOOK: query: describe ml_items_as_avro_2 +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe ml_items_as_avro_2 +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +id int from deserializer +title string from deserializer +imdb_url string from deserializer +unknown_genre int from deserializer +action int from deserializer +adventure int from deserializer +animation int from deserializer +children int from deserializer +comedy int from deserializer +crime int from deserializer +documentary int from deserializer +drama int from deserializer +fantasy int from deserializer +film_noir int from deserializer +horror int from deserializer +musical int from deserializer +mystery int from deserializer +romance int from deserializer +sci_fi int from deserializer +thriller int from deserializer +war int from deserializer +western int from deserializer +PREHOOK: query: INSERT OVERWRITE TABLE ml_items_as_avro_2 + SELECT id, title, + imdb_url, unknown_genre, action, adventure, animation, children, comedy, crime, + documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, + sci_fi, thriller, war, western + FROM ml_items +PREHOOK: type: QUERY +PREHOOK: Input: default@ml_items +PREHOOK: Output: default@ml_items_as_avro_2 +POSTHOOK: query: INSERT OVERWRITE TABLE ml_items_as_avro_2 + SELECT id, title, + imdb_url, unknown_genre, action, adventure, animation, children, comedy, crime, + documentary, drama, fantasy, film_noir, horror, musical, mystery, romance, + sci_fi, thriller, war, western + FROM ml_items +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ml_items +POSTHOOK: Output: default@ml_items_as_avro_2 +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +PREHOOK: query: select * from ml_items_as_avro_2 ORDER BY id ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@ml_items_as_avro_2 +#### A masked pattern was here #### +POSTHOOK: query: select * from ml_items_as_avro_2 ORDER BY id ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ml_items_as_avro_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: ml_items_as_avro.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.action EXPRESSION [(ml_items)ml_items.FieldSchema(name:action, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.adventure EXPRESSION [(ml_items)ml_items.FieldSchema(name:adventure, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.animation EXPRESSION [(ml_items)ml_items.FieldSchema(name:animation, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.children EXPRESSION [(ml_items)ml_items.FieldSchema(name:children, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.comedy EXPRESSION [(ml_items)ml_items.FieldSchema(name:comedy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.crime EXPRESSION [(ml_items)ml_items.FieldSchema(name:crime, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.documentary EXPRESSION [(ml_items)ml_items.FieldSchema(name:documentary, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.drama EXPRESSION [(ml_items)ml_items.FieldSchema(name:drama, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.fantasy EXPRESSION [(ml_items)ml_items.FieldSchema(name:fantasy, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.film_noir EXPRESSION [(ml_items)ml_items.FieldSchema(name:film_noir, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.horror EXPRESSION [(ml_items)ml_items.FieldSchema(name:horror, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.id SIMPLE [(ml_items)ml_items.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.imdb_url SIMPLE [(ml_items)ml_items.FieldSchema(name:imdb_url, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.musical EXPRESSION [(ml_items)ml_items.FieldSchema(name:musical, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.mystery EXPRESSION [(ml_items)ml_items.FieldSchema(name:mystery, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.romance EXPRESSION [(ml_items)ml_items.FieldSchema(name:romance, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.sci_fi EXPRESSION [(ml_items)ml_items.FieldSchema(name:sci_fi, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.thriller EXPRESSION [(ml_items)ml_items.FieldSchema(name:thriller, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.title SIMPLE [(ml_items)ml_items.FieldSchema(name:title, type:string, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.unknown_genre EXPRESSION [(ml_items)ml_items.FieldSchema(name:unknown_genre, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.war EXPRESSION [(ml_items)ml_items.FieldSchema(name:war, type:tinyint, comment:null), ] +POSTHOOK: Lineage: ml_items_as_avro_2.western EXPRESSION [(ml_items)ml_items.FieldSchema(name:western, type:tinyint, comment:null), ] +1 Toy Story (1995) http://us.imdb.com/M/title-exact?Toy%20Story%20(1995) 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 +2 GoldenEye (1995) http://us.imdb.com/M/title-exact?GoldenEye%20(1995) 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +3 Four Rooms (1995) http://us.imdb.com/M/title-exact?Four%20Rooms%20(1995) 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 +4 Get Shorty (1995) http://us.imdb.com/M/title-exact?Get%20Shorty%20(1995) 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 +5 Copycat (1995) http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 1 0 0 +6 Shanghai Triad (Yao a yao yao dao waipo qiao) (1995) http://us.imdb.com/Title?Yao+a+yao+yao+dao+waipo+qiao+(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +7 Twelve Monkeys (1995) http://us.imdb.com/M/title-exact?Twelve%20Monkeys%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 +8 Babe (1995) http://us.imdb.com/M/title-exact?Babe%20(1995) 0 0 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 +9 Dead Man Walking (1995) http://us.imdb.com/M/title-exact?Dead%20Man%20Walking%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 +10 Richard III (1995) http://us.imdb.com/M/title-exact?Richard%20III%20(1995) 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index 13848b6..768b1f0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -47,6 +47,11 @@ public class AvroSerdeUtils { + SCHEMA_URL + " specified, can't determine table schema"; public static final String AVRO_SERDE_SCHEMA = "avro.serde.schema"; + // provide backward compatibility for AvroSerDe + // if avro.schema.literal is not found, take schema.literal + // if avro.schema.url is not found, take schema.url + public static final String SCHEMA_LITERAL_OLD = "schema.literal"; + public static final String SCHEMA_URL_OLD = "schema.url"; /** * Determine the schema to that's been provided for Avro serde work. * @param properties containing a key pointing to the schema, one way or another @@ -57,11 +62,17 @@ public class AvroSerdeUtils { public static Schema determineSchemaOrThrowException(Properties properties) throws IOException, AvroSerdeException { String schemaString = properties.getProperty(SCHEMA_LITERAL); + if (schemaString == null) { + schemaString = properties.getProperty(SCHEMA_LITERAL_OLD); + } if(schemaString != null && !schemaString.equals(SCHEMA_NONE)) return Schema.parse(schemaString); // Try pulling directly from URL schemaString = properties.getProperty(SCHEMA_URL); + if (schemaString == null) { + schemaString = properties.getProperty(SCHEMA_URL_OLD); + } if(schemaString == null || schemaString.equals(SCHEMA_NONE)) throw new AvroSerdeException(EXCEPTION_MESSAGE);