Index: eclipse-templates/.classpath =================================================================== --- eclipse-templates/.classpath (revision 1523883) +++ eclipse-templates/.classpath (working copy) @@ -47,6 +47,8 @@ + + Index: ivy/libraries.properties =================================================================== --- ivy/libraries.properties (revision 1523883) +++ ivy/libraries.properties (working copy) @@ -22,7 +22,7 @@ ant-task.version=2.0.10 antlr.version=3.4 antlr-runtime.version=3.4 -avro.version=1.7.1 +avro.version=1.7.5 datanucleus-api-jdo.version=3.2.1 datanucleus-core.version=3.2.2 datanucleus-rdbms.version=3.2.1 Index: serde/ivy.xml =================================================================== --- serde/ivy.xml (revision 1523883) +++ serde/ivy.xml (working copy) @@ -40,6 +40,10 @@ transitive="false"/> + + Index: serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (revision 1523883) +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (working copy) @@ -28,9 +28,12 @@ import java.util.Hashtable; import java.util.List; import java.util.Map; +import java.util.Properties; import org.apache.avro.Schema; import org.apache.avro.generic.GenericData; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; @@ -500,4 +503,117 @@ assertEquals(expected, soi.getPrimitiveJavaObject(rowElement)); } } + + @Test + public void canDeserializeColumnsSelectively() throws SerDeException, IOException { + String LOCALRECORD_SCHEMA = "{\n" + + " \"namespace\": \"test.avro.deserializer\",\n" + + " \"name\": \"oneRecord\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"string1\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"string2\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"int1\",\n" + + " \"type\":\"int\"\n" + + " },\n" + + " {\n" + + " \"name\":\"aRecord\",\n" + + " \"type\":{\"type\":\"record\",\n" + + " \"name\":\"recordWithinARecord\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"int1\",\n" + + " \"type\":\"int\"\n" + + " },\n" + + " {\n" + + " \"name\":\"boolean1\",\n" + + " \"type\":\"boolean\"\n" + + " },\n" + + " {\n" + + " \"name\":\"long1\",\n" + + " \"type\":\"long\"\n" + + " }\n" + + " ]}\n" + + " }\n" + + " ]\n" + + "}"; + + Schema s = Schema.parse(LOCALRECORD_SCHEMA); + GenericData.Record record = new GenericData.Record(s); + + record.put("string1", "Wandered Aimlessly"); + record.put("string2", "as a cloud"); + record.put("int1", 555); + + GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); + innerRecord.put("int1", 42); + innerRecord.put("boolean1", true); + innerRecord.put("long1", 42432234234l); + record.put("aRecord", innerRecord); + + assertTrue(GENERIC_DATA.validate(s, record)); + + AvroGenericRecordWritable garw = Utils.serializeAndDeserializeRecord(record); + + AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); + + AvroSerDe des = new AvroSerDe(); + Configuration hconf = new Configuration(); + Properties props = new Properties(); + props.put(AvroSerdeUtils.SCHEMA_LITERAL, LOCALRECORD_SCHEMA); + + ArrayList queryCols = new ArrayList(); + queryCols.add(3); + queryCols.add(1); + queryCols.add(1); + queryCols.add(3); + queryCols.add(1); + ColumnProjectionUtils.appendReadColumnIDs(hconf, queryCols); + + + // Initialize deserializer + des.initialize(hconf, props); + + ArrayList row = (ArrayList)des.deserialize(garw); + assertEquals(4, row.size()); + + Object stringObj = row.get(1); + assertEquals(stringObj, "as a cloud"); + + Object theRecordObject = row.get(3); + System.out.println("theRecordObject = " + theRecordObject.getClass().getCanonicalName()); + + Object theIntObject = row.get(2); + assertEquals(theIntObject, null); + + // The original record was lost in the deserialization, so just go the correct way, through objectinspectors + StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector(); + List allStructFieldRefs = oi.getAllStructFieldRefs(); + assertEquals(4, allStructFieldRefs.size()); + StructField fieldRefForaRecord = allStructFieldRefs.get(3); + assertEquals("arecord", fieldRefForaRecord.getFieldName()); + Object innerRecord2 = oi.getStructFieldData(row, fieldRefForaRecord); // <--- use this! + + // Extract innerRecord field refs + StandardStructObjectInspector innerRecord2OI = (StandardStructObjectInspector) fieldRefForaRecord.getFieldObjectInspector(); + + List allStructFieldRefs1 = innerRecord2OI.getAllStructFieldRefs(); + assertEquals(3, allStructFieldRefs1.size()); + assertEquals("int1", allStructFieldRefs1.get(0).getFieldName()); + assertEquals("boolean1", allStructFieldRefs1.get(1).getFieldName()); + assertEquals("long1", allStructFieldRefs1.get(2).getFieldName()); + + innerRecord2OI.getStructFieldsDataAsList(innerRecord2); + assertEquals(42, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(0))); + assertEquals(true, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(1))); + assertEquals(42432234234l, innerRecord2OI.getStructFieldData(innerRecord2, allStructFieldRefs1.get(2))); + } + } Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (revision 1523883) +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (working copy) @@ -96,7 +96,12 @@ private List row; private SchemaReEncoder reEncoder; + private List readColumnArray; + public void setReadColumnsArray(List rColumnArray) { + this.readColumnArray = rColumnArray; + } + /** * Deserialize an Avro record, recursing into its component fields and * deserializing them as well. Fields of the record are matched by name @@ -137,21 +142,38 @@ r = reEncoder.reencode(r, readerSchema); } - workerBase(row, columnNames, columnTypes, r); + workerBase(row, columnNames, columnTypes, r, true); return row; } + // Returns true iff the colid is needed by the query. + private boolean isNeededColumn(boolean project, int projectedIdx, int colId){ + if (!project || readColumnArray == null || + (projectedIdx < readColumnArray.size() && readColumnArray.get(projectedIdx) == colId)) { + return true; + } + return false; + } + // The actual deserialization may involve nested records, which require recursion. private List workerBase(List objectRow, List columnNames, - List columnTypes, GenericRecord record) + List columnTypes, GenericRecord record, + boolean project) throws AvroSerdeException { + //Because projectedColumns is sorted, we can just walk the list and add nulls where appropriate + + int projectedIdx = 0; for(int i = 0; i < columnNames.size(); i++) { - TypeInfo columnType = columnTypes.get(i); - String columnName = columnNames.get(i); - Object datum = record.get(columnName); - Schema datumSchema = record.getSchema().getField(columnName).schema(); - - objectRow.add(worker(datum, datumSchema, columnType)); + if (isNeededColumn(project, projectedIdx, i)){ + projectedIdx++; + TypeInfo columnType = columnTypes.get(i); + String columnName = columnNames.get(i); + Object datum = record.get(columnName); + Schema datumSchema = record.getSchema().getField(columnName).schema(); + objectRow.add(worker(datum, datumSchema, columnType)); + } else { + objectRow.add(null); + } } return objectRow; @@ -230,7 +252,7 @@ ArrayList innerFieldNames = columnType.getAllStructFieldNames(); List innerObjectRow = new ArrayList(innerFieldTypes.size()); - return workerBase(innerObjectRow, innerFieldNames, innerFieldTypes, datum); + return workerBase(innerObjectRow, innerFieldNames, innerFieldTypes, datum, false); } private Object deserializeUnion(Object datum, Schema recordSchema, Index: serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (revision 1523883) +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java (working copy) @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.avro; +import java.util.Collections; import java.util.List; import java.util.Properties; @@ -24,6 +25,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; @@ -39,6 +41,7 @@ private ObjectInspector oi; private List columnNames; private List columnTypes; + private List readColumns; private Schema schema; private AvroDeserializer avroDeserializer = null; private AvroSerializer avroSerializer = null; @@ -70,6 +73,7 @@ this.columnNames = aoig.getColumnNames(); this.columnTypes = aoig.getColumnTypes(); this.oi = aoig.getObjectInspector(); + this.readColumns = generateReadColumns(configuration); } @Override @@ -107,6 +111,7 @@ private AvroDeserializer getDeserializer() { if(avroDeserializer == null) { avroDeserializer = new AvroDeserializer(); + avroDeserializer.setReadColumnsArray(readColumns); } return avroDeserializer; @@ -119,4 +124,22 @@ return avroSerializer; } + + private List generateReadColumns(Configuration configuration) { + List columns = null; + if (configuration != null){ + columns = ColumnProjectionUtils.getReadColumnIDs(configuration); + // For "Select *" type queries, the column array comes in empty. handle + // this similar to the case where optimization is disabled. + if (columns.size()==0) { + return null; + } + } else { + return null; + } + + // sort and eliminate duplicates + Collections.sort(columns); + return columns; + } } Index: ql/src/test/results/clientpositive/avro_column_sanity_test.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_column_sanity_test.q.out (revision 0) +++ ql/src/test/results/clientpositive/avro_column_sanity_test.q.out (revision 0) @@ -0,0 +1,291 @@ +PREHOOK: query: -- verify that we can actually read avro-trevni files +CREATE TABLE futurama_episodes +ROW FORMAT SERDE +'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +TBLPROPERTIES ( + 'avro.schema.literal'= + '{"namespace":"testing.hive.avro.columnar", + "name":"futurama_episode", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"season", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}') +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- verify that we can actually read avro-trevni files +CREATE TABLE futurama_episodes +ROW FORMAT SERDE +'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +TBLPROPERTIES ( + 'avro.schema.literal'= + '{"namespace":"testing.hive.avro.columnar", + "name":"futurama_episode", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"season", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@futurama_episodes +PREHOOK: query: DESCRIBE futurama_episodes +PREHOOK: type: DESCTABLE +POSTHOOK: query: DESCRIBE futurama_episodes +POSTHOOK: type: DESCTABLE +id int from deserializer +season int from deserializer +episode int from deserializer +title string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/futurama_episodes.avro' INTO TABLE futurama_episodes +PREHOOK: type: LOAD +PREHOOK: Output: default@futurama_episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/futurama_episodes.avro' INTO TABLE futurama_episodes +POSTHOOK: type: LOAD +POSTHOOK: Output: default@futurama_episodes +PREHOOK: query: SELECT * FROM futurama_episodes ORDER BY id +PREHOOK: type: QUERY +PREHOOK: Input: default@futurama_episodes +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM futurama_episodes ORDER BY id +POSTHOOK: type: QUERY +POSTHOOK: Input: default@futurama_episodes +#### A masked pattern was here #### +1 1 1 Space Pilot 3000 +2 1 2 The Series Has Landed +3 1 3 I, Roommate +4 1 4 Love's Labors Lost in Space +5 1 5 Fear of a Bot Planet +6 1 6 A Fishful of Dollars +7 1 7 My Three Suns +8 1 8 A Big Piece of Garbage +9 1 9 Hell Is Other Robots +10 2 1 A Flight to Remember +11 2 2 Mars University +12 2 3 When Aliens Attack +13 2 4 Fry & the Slurm Factory +14 2 5 I Second That Emotion +15 2 6 Brannigan Begin Again +16 2 7 A Head in the Polls +17 2 8 Xmas Story +18 2 9 Why Must I Be a Crustacean in Love +20 2 10 Put Your Head on My Shoulder +20 2 11 Lesser of Two Evils +21 2 12 Raging Bender +22 2 13 A Bicyclops Built for Two +23 2 14 How Hermes Requisitioned His Groove Back +24 2 15 A Clone of My Own +25 2 16 The Deep South +26 2 17 Bender Gets Made +27 2 18 The Problem with Popplers +28 2 19 Mother's Day +29 2 20 Anthology of Interest (1) +30 3 1 The Honking +31 3 2 War Is the H-Word +32 3 3 The Cryonic Woman +33 3 4 Parasites Lost +34 3 5 Amazon Women in the Mood +35 3 6 Bendless Love +36 3 7 The Day the Earth Stood Stupid +37 3 8 That's Lobstertainment! +38 3 9 The Birdbot of Ice-Catraz +39 3 10 Luck of the Fryrish +40 3 11 The Cyber House Rules +41 3 12 Insane in the Mainframe +42 3 13 Bendin' in the Wind +43 3 14 Time Keeps on Slipping +44 3 15 I Dated a Robot +45 4 1 Roswell That Ends Well +46 4 2 A Tale of Two Santas +47 4 3 Anthology of Interest (2) +48 4 4 Love and Rocket +49 4 5 Leela's Homeworld +50 4 6 Where the Buggalo Roam +51 4 7 A Pharaoh to Remember +52 4 8 Godfellas +53 4 9 Futurestock +54 4 10 A Leela of Her Own +55 4 11 30% Iron Chef +56 4 12 Where No Fan Has Gone Before +57 5 1 Crimes of the Hot +58 5 2 Jurassic Bark +59 5 3 The Route of All Evil +60 5 4 A Taste of Freedom +61 5 5 Kif Gets Knocked Up a Notch +62 5 6 Less Than Hero +63 5 7 Teenage Mutant Leela's Hurdles +64 5 8 The Why of Fry +65 5 9 The Sting +66 5 10 The Farnsworth Parabox +67 5 11 Three Hundred Big Boys +68 5 12 Spanish Fry +69 5 13 Bend Her +70 5 14 Obsoletely Fabulous +71 5 15 Bender Should Not Be Allowed on Television +72 5 16 The Devil's Hands Are Idle Playthings +73 6 1 Rebirth +74 6 2 In-A-Gadda-Da-Leela +75 6 3 Attack of the Killer App +76 6 4 Proposition Infinity +77 6 5 The Duh-Vinci Code +78 6 6 Lethal Inspection +79 6 7 The Late Philip J. Fry +80 6 8 That Darn Katz! +81 6 9 A Clockwork Origin +82 6 10 The Prisoner of Benda +83 6 11 Lrrreconcilable Ndndifferences +84 6 12 The Mutants Are Revolting +85 6 13 The Futurama Holiday Spectacular +86 6 14 Neutopia +87 6 15 Benderama +88 6 16 Ghost in the Machines +89 6 17 Law and Oracle +90 6 18 The Silence of the Clamps +91 6 19 Yo Leela Leela +92 6 20 All the Presidents' Heads +93 6 21 Möbius Dick +94 6 22 Fry am the Egg Man +95 6 23 The Tip of the Zoidberg +96 6 24 Cold Warriors +97 6 25 Overclockwise +98 6 26 Reincarnation +99 7 1 The Bots and the Bees +100 7 2 A Farewell to Arms +101 7 3 Decision 3012 +102 7 4 The Thief of Baghead +103 7 5 Zapp Dingbat +104 7 6 The Butterjunk Effect +105 7 7 The Six Million Dollar Mon +106 7 8 Fun on a Bun +107 7 9 Free Will Hunting +108 7 10 Near-Death Wish +109 7 11 Viva Mars Vegas +110 7 12 31st Century Fox +111 7 13 Naturama +PREHOOK: query: -- Want a MR job +SELECT distinct(title) FROM futurama_episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@futurama_episodes +#### A masked pattern was here #### +POSTHOOK: query: -- Want a MR job +SELECT distinct(title) FROM futurama_episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@futurama_episodes +#### A masked pattern was here #### +30% Iron Chef +31st Century Fox +A Bicyclops Built for Two +A Big Piece of Garbage +A Clockwork Origin +A Clone of My Own +A Farewell to Arms +A Fishful of Dollars +A Flight to Remember +A Head in the Polls +A Leela of Her Own +A Pharaoh to Remember +A Tale of Two Santas +A Taste of Freedom +All the Presidents' Heads +Amazon Women in the Mood +Anthology of Interest (1) +Anthology of Interest (2) +Attack of the Killer App +Bend Her +Bender Gets Made +Bender Should Not Be Allowed on Television +Benderama +Bendin' in the Wind +Bendless Love +Brannigan Begin Again +Cold Warriors +Crimes of the Hot +Decision 3012 +Fear of a Bot Planet +Free Will Hunting +Fry & the Slurm Factory +Fry am the Egg Man +Fun on a Bun +Futurestock +Ghost in the Machines +Godfellas +Hell Is Other Robots +How Hermes Requisitioned His Groove Back +I Dated a Robot +I Second That Emotion +I, Roommate +In-A-Gadda-Da-Leela +Insane in the Mainframe +Jurassic Bark +Kif Gets Knocked Up a Notch +Law and Oracle +Leela's Homeworld +Less Than Hero +Lesser of Two Evils +Lethal Inspection +Love and Rocket +Love's Labors Lost in Space +Lrrreconcilable Ndndifferences +Luck of the Fryrish +Mars University +Mother's Day +My Three Suns +Möbius Dick +Naturama +Near-Death Wish +Neutopia +Obsoletely Fabulous +Overclockwise +Parasites Lost +Proposition Infinity +Put Your Head on My Shoulder +Raging Bender +Rebirth +Reincarnation +Roswell That Ends Well +Space Pilot 3000 +Spanish Fry +Teenage Mutant Leela's Hurdles +That Darn Katz! +That's Lobstertainment! +The Birdbot of Ice-Catraz +The Bots and the Bees +The Butterjunk Effect +The Cryonic Woman +The Cyber House Rules +The Day the Earth Stood Stupid +The Deep South +The Devil's Hands Are Idle Playthings +The Duh-Vinci Code +The Farnsworth Parabox +The Futurama Holiday Spectacular +The Honking +The Late Philip J. Fry +The Mutants Are Revolting +The Prisoner of Benda +The Problem with Popplers +The Route of All Evil +The Series Has Landed +The Silence of the Clamps +The Six Million Dollar Mon +The Sting +The Thief of Baghead +The Tip of the Zoidberg +The Why of Fry +Three Hundred Big Boys +Time Keeps on Slipping +Viva Mars Vegas +War Is the H-Word +When Aliens Attack +Where No Fan Has Gone Before +Where the Buggalo Roam +Why Must I Be a Crustacean in Love +Xmas Story +Yo Leela Leela +Zapp Dingbat Index: ql/src/test/queries/clientpositive/avro_partition_format.q =================================================================== --- ql/src/test/queries/clientpositive/avro_partition_format.q (revision 0) +++ ql/src/test/queries/clientpositive/avro_partition_format.q (revision 0) @@ -0,0 +1,51 @@ +--Ensure that we can change the file format across partitions and retain access to all data +CREATE TABLE futurama +ROW FORMAT SERDE +'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +TBLPROPERTIES ( + 'avro.schema.literal'= + '{"namespace":"testing.hive.avro.columnar", + "name":"futurama_episode", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"season", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}'); + +LOAD DATA LOCAL INPATH '../data/files/futurama_episodes.avro' INTO TABLE futurama; + +CREATE TABLE futurama_partitioned + PARTITIONED BY (season INT) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'= + '{"name":"futurama", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}'); + +SET hive.exec.dynamic.partition.mode=nonstrict; +INSERT OVERWRITE TABLE futurama_partitioned PARTITION (season) SELECT id, episode, title, season FROM futurama where season <= 4; + +ALTER TABLE futurama_partitioned SET FILEFORMAT + INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat'; + +INSERT OVERWRITE TABLE futurama_partitioned PARTITION (season) SELECT id, episode, title, season FROM futurama where season > 4; + +SELECT * FROM futurama_partitioned; + +-- Want an MR job +SELECT distinct(title) AS unique_titles FROM futurama_partitioned; Index: ql/src/test/queries/clientpositive/avro_column_sanity_test.q =================================================================== --- ql/src/test/queries/clientpositive/avro_column_sanity_test.q (revision 0) +++ ql/src/test/queries/clientpositive/avro_column_sanity_test.q (revision 0) @@ -0,0 +1,26 @@ +-- verify that we can actually read avro-trevni files +CREATE TABLE futurama_episodes +ROW FORMAT SERDE +'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +TBLPROPERTIES ( + 'avro.schema.literal'= + '{"namespace":"testing.hive.avro.columnar", + "name":"futurama_episode", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"season", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}'); + +DESCRIBE futurama_episodes; + +LOAD DATA LOCAL INPATH '../data/files/futurama_episodes.avro' INTO TABLE futurama_episodes; + +SELECT * FROM futurama_episodes ORDER BY id; + +-- Want a MR job +SELECT distinct(title) FROM futurama_episodes; Index: ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnOutputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnOutputFormat.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnOutputFormat.java (revision 0) @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.avro; + +import static org.apache.avro.file.DataFileConstants.DEFLATE_CODEC; +import static org.apache.avro.mapred.AvroJob.OUTPUT_CODEC; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Properties; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.ql.io.HiveOutputFormat; +import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.Progressable; +import org.apache.trevni.ColumnFileMetaData; +import org.apache.trevni.avro.AvroColumnWriter; + +/** + * Write to a columnar Avro (Trevni) file from a Hive process. + */ +public class AvroColumnOutputFormat + implements HiveOutputFormat { + + @Override + public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, + Path path, Class valueClass, boolean isCompressed, + Properties properties, Progressable progressable) throws IOException { + Schema schema; + try { + schema = AvroSerdeUtils.determineSchemaOrThrowException(properties); + } catch (AvroSerdeException e) { + throw new IOException(e); + } + + ColumnFileMetaData meta = new ColumnFileMetaData(); + if (isCompressed) { + meta.setCodec(jobConf.get(OUTPUT_CODEC, DEFLATE_CODEC)); + } + + AvroColumnWriter acw = new AvroColumnWriter(schema, meta); + OutputStream out = path.getFileSystem(jobConf).create(path); + + return new AvroColumnRecordWriter(acw, out); + } + + //no records will be emitted from Hive + public RecordWriter + getRecordWriter(FileSystem ignored, JobConf job, String name, + Progressable progress) { + return new RecordWriter() { + public void write(LongWritable key, AvroGenericRecordWritable value) { + throw new RuntimeException("Should not be called"); + } + + public void close(Reporter reporter) { + } + }; + } + + public void checkOutputSpecs(FileSystem ignored, JobConf job) throws IOException { + return; // Not doing any check + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnRecordWriter.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnRecordWriter.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnRecordWriter.java (revision 0) @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.avro; + + +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.avro.generic.GenericRecord; +import org.apache.hadoop.hive.ql.exec.FileSinkOperator; +import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; +import org.apache.hadoop.io.Writable; +import org.apache.trevni.avro.AvroColumnWriter; + +/** + * Write an Avro GenericRecord to an Avro data file. + */ +public class AvroColumnRecordWriter implements FileSinkOperator.RecordWriter{ + final private AvroColumnWriter acw; + final private OutputStream out; + + public AvroColumnRecordWriter(AvroColumnWriter acw, OutputStream out) throws IOException { + this.acw = acw; + this.out = out; + } + + @Override + public void write(Writable writable) throws IOException { + if(!(writable instanceof AvroGenericRecordWritable)) { + throw new IOException("Expecting instance of AvroGenericRecordWritable, " + + "but received" + writable.getClass().getCanonicalName()); + } + AvroGenericRecordWritable r = (AvroGenericRecordWritable)writable; + acw.write(r.getRecord()); + } + + + @Override + public void close(boolean abort) throws IOException { + acw.writeTo(out); + out.close(); + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroRecordReaderBase.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroRecordReaderBase.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroRecordReaderBase.java (revision 0) @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.avro; + +import java.io.IOException; +import java.util.Map; +import java.util.Properties; + +import org.apache.avro.Schema; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; +import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobConfigurable; +import org.apache.hadoop.mapred.RecordReader; + +abstract class AvroRecordReaderBase implements +RecordReader, JobConfigurable { + protected static final Log LOG = LogFactory.getLog(AvroRecordReaderBase.class); + + public abstract float getProgress() throws IOException; + + public abstract long getPos() throws IOException; + + public abstract boolean next(NullWritable nullWritable, AvroGenericRecordWritable record) throws IOException; + + protected JobConf jobConf; + + /** + * Attempt to retrieve the reader schema. We have a couple opportunities + * to provide this, depending on whether or not we're just selecting data + * or running with a MR job. + * @return Reader schema for the Avro object, or null if it has not been provided. + * @throws AvroSerdeException + */ + protected Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException, IOException { + FileSystem fs = split.getPath().getFileSystem(job); + // Inside of a MR job, we can pull out the actual properties + if(AvroSerdeUtils.insideMRJob(job)) { + MapWork mapWork = Utilities.getMapWork(job); + + // Iterate over the Path -> Partition descriptions to find the partition + // that matches our input split. + for (Map.Entry pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){ + String partitionPath = pathsAndParts.getKey(); + if(pathIsInPartition(split.getPath(), partitionPath)) { + if(LOG.isInfoEnabled()) { + LOG.info("Matching partition " + partitionPath + + " with input split " + split); + } + + Properties props = pathsAndParts.getValue().getProperties(); + if(props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL) || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) { + return AvroSerdeUtils.determineSchemaOrThrowException(props); + } + else { + return null; // If it's not in this property, it won't be in any others + } + } + } + if(LOG.isInfoEnabled()) { + LOG.info("Unable to match filesplit " + split + " with a partition."); + } + } + + // In "select * from table" situations (non-MR), we can add things to the job + // It's safe to add this to the job since it's not *actually* a mapred job. + // Here the global state is confined to just this process. + String s = job.get(AvroSerdeUtils.AVRO_SERDE_SCHEMA); + if(s != null) { + LOG.info("Found the avro schema in the job: " + s); + return Schema.parse(s); + } + // No more places to get the schema from. Give up. May have to re-encode later. + return null; + } + + private boolean pathIsInPartition(Path split, String partitionPath) { + boolean schemeless = split.toUri().getScheme() == null; + if (schemeless) { + String schemelessPartitionPath = new Path(partitionPath).toUri().getPath(); + return split.toString().startsWith(schemelessPartitionPath); + } else { + return split.toString().startsWith(partitionPath); + } + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public AvroGenericRecordWritable createValue() { + return new AvroGenericRecordWritable(); + } + + @Override + public abstract void close() throws IOException; + + @Override + public void configure(JobConf jobConf) { + this.jobConf= jobConf; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (revision 1523883) +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java (working copy) @@ -17,10 +17,7 @@ */ package org.apache.hadoop.hive.ql.io.avro; - import java.io.IOException; -import java.util.Map; -import java.util.Properties; import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; @@ -30,33 +27,23 @@ import org.apache.avro.mapred.FsInput; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; -import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.JobConfigurable; -import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; /** * RecordReader optimized against Avro GenericRecords that returns to record * as the value of the k-v pair, as Hive requires. */ -public class AvroGenericRecordReader implements - RecordReader, JobConfigurable { +public class AvroGenericRecordReader extends AvroRecordReaderBase { private static final Log LOG = LogFactory.getLog(AvroGenericRecordReader.class); - final private org.apache.avro.file.FileReader reader; final private long start; final private long stop; - protected JobConf jobConf; + final private DataFileReader reader; public AvroGenericRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { this.jobConf = job; @@ -80,66 +67,6 @@ this.stop = split.getStart() + split.getLength(); } - /** - * Attempt to retrieve the reader schema. We have a couple opportunities - * to provide this, depending on whether or not we're just selecting data - * or running with a MR job. - * @return Reader schema for the Avro object, or null if it has not been provided. - * @throws AvroSerdeException - */ - private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException, IOException { - FileSystem fs = split.getPath().getFileSystem(job); - // Inside of a MR job, we can pull out the actual properties - if(AvroSerdeUtils.insideMRJob(job)) { - MapWork mapWork = Utilities.getMapWork(job); - - // Iterate over the Path -> Partition descriptions to find the partition - // that matches our input split. - for (Map.Entry pathsAndParts: mapWork.getPathToPartitionInfo().entrySet()){ - String partitionPath = pathsAndParts.getKey(); - if(pathIsInPartition(split.getPath(), partitionPath)) { - if(LOG.isInfoEnabled()) { - LOG.info("Matching partition " + partitionPath + - " with input split " + split); - } - - Properties props = pathsAndParts.getValue().getProperties(); - if(props.containsKey(AvroSerdeUtils.SCHEMA_LITERAL) || props.containsKey(AvroSerdeUtils.SCHEMA_URL)) { - return AvroSerdeUtils.determineSchemaOrThrowException(props); - } - else { - return null; // If it's not in this property, it won't be in any others - } - } - } - if(LOG.isInfoEnabled()) { - LOG.info("Unable to match filesplit " + split + " with a partition."); - } - } - - // In "select * from table" situations (non-MR), we can add things to the job - // It's safe to add this to the job since it's not *actually* a mapred job. - // Here the global state is confined to just this process. - String s = job.get(AvroSerdeUtils.AVRO_SERDE_SCHEMA); - if(s != null) { - LOG.info("Found the avro schema in the job: " + s); - return Schema.parse(s); - } - // No more places to get the schema from. Give up. May have to re-encode later. - return null; - } - - private boolean pathIsInPartition(Path split, String partitionPath) { - boolean schemeless = split.toUri().getScheme() == null; - if (schemeless) { - String schemelessPartitionPath = new Path(partitionPath).toUri().getPath(); - return split.toString().startsWith(schemelessPartitionPath); - } else { - return split.toString().startsWith(partitionPath); - } - } - - @Override public boolean next(NullWritable nullWritable, AvroGenericRecordWritable record) throws IOException { if(!reader.hasNext() || reader.pastSync(stop)) { @@ -153,33 +80,18 @@ } @Override - public NullWritable createKey() { - return NullWritable.get(); - } - - @Override - public AvroGenericRecordWritable createValue() { - return new AvroGenericRecordWritable(); - } - - @Override public long getPos() throws IOException { return reader.tell(); } @Override - public void close() throws IOException { - reader.close(); - } - - @Override public float getProgress() throws IOException { return stop == start ? 0.0f : Math.min(1.0f, (getPos() - start) / (float)(stop - start)); } @Override - public void configure(JobConf jobConf) { - this.jobConf= jobConf; + public void close() throws IOException { + reader.close(); } } Index: ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnRecordReader.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnRecordReader.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnRecordReader.java (revision 0) @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.avro; + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.avro.Schema; +import org.apache.avro.Schema.Field; +import org.apache.avro.generic.GenericData; +import org.apache.avro.generic.GenericRecord; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Reporter; +import org.apache.trevni.avro.AvroColumnReader; +import org.apache.trevni.avro.HadoopInput; + +/** + * RecordReader for Trevni (columnar) formated Avro data. Reads only the columns needed, leaving all other fields null. + */ +public class AvroColumnRecordReader extends AvroRecordReaderBase { + protected static final Log LOG = LogFactory.getLog(AvroColumnRecordReader.class); + + Schema readSchema; + Schema projectSchema; + private long count = 0; + final private long total_rows; + final private AvroColumnReader reader; + private final GenericRecord cache; + + public AvroColumnRecordReader(JobConf job, FileSplit split, Reporter reporter) throws IOException { + this.jobConf = job; + Schema latest; + + try { + latest = getSchema(job, split); + } catch (AvroSerdeException e) { + throw new IOException(e); + } + + readSchema = latest; + cache = new GenericData.Record(readSchema); + + //Trevni spec requires that 1 file = 1 block, so we'll read the whole file. It's up to the user + //to conform to this requirement. + + HadoopInput input = new HadoopInput(split.getPath(), job); + AvroColumnReader.Params params = new AvroColumnReader.Params(input); + projectSchema = getProjectedSchema(job, latest); + params.setSchema (projectSchema); + reader = new AvroColumnReader(params); + total_rows = reader.getRowCount(); + } + + //Project an Avro schema based on columnIds. We're only able to project the top level fields + private Schema getProjectedSchema(Configuration conf, Schema baseSchema){ + List readColumns = ColumnProjectionUtils.getReadColumnIDs(conf); + if (readColumns.isEmpty()) { + return baseSchema; + } + List fields = baseSchema.getFields(); + List readFields = new ArrayList(); + for (Integer columnId : readColumns){ + readFields.add(clone(fields.get(columnId))); + } + Schema projected = Schema.createRecord(baseSchema.getName(), null, baseSchema.getNamespace(), false); + projected.setFields(readFields); + return projected; + } + + Field clone(Field field){ + Field cloned = new Field(field.name(), field.schema(), field.doc(), field.defaultValue()); + return cloned; + } + + @Override + public boolean next(NullWritable nullWritable, AvroGenericRecordWritable record) throws IOException { + if(!reader.hasNext()) { + return false; + } + + //The RecordReader should return a record with the proper schema, so we copy over the projected fields + GenericData.Record r = (GenericData.Record)reader.next(); + count++; + for (Schema.Field field : projectSchema.getFields()){ + String name = field.name(); + cache.put(name, r.get(name)); + } + record.setRecord(cache); + return true; + } + + @Override + public long getPos() throws IOException { + //This isn't canonical, but it makes the most sense for a columnar storage + return count; + } + + @Override + public float getProgress() throws IOException { + return ((float)count)/total_rows; + } + + @Override + public void close() throws IOException { + reader.close(); + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnInputFormat.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroColumnInputFormat.java (revision 0) @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.avro; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.hive.serde2.avro.AvroGenericRecordWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileInputFormat; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobConfigurable; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapred.Reporter; + +public class AvroColumnInputFormat + extends FileInputFormat implements JobConfigurable { + protected JobConf jobConf; + + @Override + protected FileStatus[] listStatus(JobConf job) throws IOException { + List result = new ArrayList(); + for (FileStatus file : super.listStatus(job)) { + result.add(file); + } + return result.toArray(new FileStatus[0]); + } + + @Override + public RecordReader + getRecordReader(InputSplit inputSplit, JobConf jc, Reporter reporter) throws IOException { + return new AvroColumnRecordReader(jc, (FileSplit) inputSplit, reporter); + } + + @Override + public void configure(JobConf jobConf) { + this.jobConf = jobConf; + } +} Index: ql/build.xml =================================================================== --- ql/build.xml (revision 1523883) +++ ql/build.xml (working copy) @@ -275,6 +275,18 @@ + + + + + + + + + + + + + + Index: ql/src/test/results/clientpositive/avro_partition_format.q.out =================================================================== --- ql/src/test/results/clientpositive/avro_partition_format.q.out (revision 0) +++ ql/src/test/results/clientpositive/avro_partition_format.q.out (revision 0) @@ -0,0 +1,468 @@ +PREHOOK: query: --Ensure that we can change the file format across partitions and retain access to all data +CREATE TABLE futurama +ROW FORMAT SERDE +'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +TBLPROPERTIES ( + 'avro.schema.literal'= + '{"namespace":"testing.hive.avro.columnar", + "name":"futurama_episode", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"season", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}') +PREHOOK: type: CREATETABLE +POSTHOOK: query: --Ensure that we can change the file format across partitions and retain access to all data +CREATE TABLE futurama +ROW FORMAT SERDE +'org.apache.hadoop.hive.serde2.avro.AvroSerDe' +STORED AS INPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' +OUTPUTFORMAT +'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +TBLPROPERTIES ( + 'avro.schema.literal'= + '{"namespace":"testing.hive.avro.columnar", + "name":"futurama_episode", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"season", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@futurama +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/futurama_episodes.avro' INTO TABLE futurama +PREHOOK: type: LOAD +PREHOOK: Output: default@futurama +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/futurama_episodes.avro' INTO TABLE futurama +POSTHOOK: type: LOAD +POSTHOOK: Output: default@futurama +PREHOOK: query: CREATE TABLE futurama_partitioned + PARTITIONED BY (season INT) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'= + '{"name":"futurama", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}') +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE futurama_partitioned + PARTITIONED BY (season INT) + ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' + STORED AS INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' + TBLPROPERTIES ( + 'avro.schema.literal'= + '{"name":"futurama", + "type":"record", + "fields":[{"name":"id", "type":"int"}, + {"name":"episode", "type":"int"}, + {"name":"title", "type":"string"}]}') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@futurama_partitioned +PREHOOK: query: INSERT OVERWRITE TABLE futurama_partitioned PARTITION (season) SELECT id, episode, title, season FROM futurama where season <= 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@futurama +PREHOOK: Output: default@futurama_partitioned +POSTHOOK: query: INSERT OVERWRITE TABLE futurama_partitioned PARTITION (season) SELECT id, episode, title, season FROM futurama where season <= 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@futurama +POSTHOOK: Output: default@futurama_partitioned@season=1 +POSTHOOK: Output: default@futurama_partitioned@season=2 +POSTHOOK: Output: default@futurama_partitioned@season=3 +POSTHOOK: Output: default@futurama_partitioned@season=4 +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: ALTER TABLE futurama_partitioned SET FILEFORMAT + INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@futurama_partitioned +PREHOOK: Output: default@futurama_partitioned +POSTHOOK: query: ALTER TABLE futurama_partitioned SET FILEFORMAT + INPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroColumnInputFormat' + OUTPUTFORMAT + 'org.apache.hadoop.hive.ql.io.avro.AvroColumnOutputFormat' +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@futurama_partitioned +POSTHOOK: Output: default@futurama_partitioned +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: INSERT OVERWRITE TABLE futurama_partitioned PARTITION (season) SELECT id, episode, title, season FROM futurama where season > 4 +PREHOOK: type: QUERY +PREHOOK: Input: default@futurama +PREHOOK: Output: default@futurama_partitioned +POSTHOOK: query: INSERT OVERWRITE TABLE futurama_partitioned PARTITION (season) SELECT id, episode, title, season FROM futurama where season > 4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@futurama +POSTHOOK: Output: default@futurama_partitioned@season=5 +POSTHOOK: Output: default@futurama_partitioned@season=6 +POSTHOOK: Output: default@futurama_partitioned@season=7 +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: SELECT * FROM futurama_partitioned +PREHOOK: type: QUERY +PREHOOK: Input: default@futurama_partitioned +PREHOOK: Input: default@futurama_partitioned@season=1 +PREHOOK: Input: default@futurama_partitioned@season=2 +PREHOOK: Input: default@futurama_partitioned@season=3 +PREHOOK: Input: default@futurama_partitioned@season=4 +PREHOOK: Input: default@futurama_partitioned@season=5 +PREHOOK: Input: default@futurama_partitioned@season=6 +PREHOOK: Input: default@futurama_partitioned@season=7 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM futurama_partitioned +POSTHOOK: type: QUERY +POSTHOOK: Input: default@futurama_partitioned +POSTHOOK: Input: default@futurama_partitioned@season=1 +POSTHOOK: Input: default@futurama_partitioned@season=2 +POSTHOOK: Input: default@futurama_partitioned@season=3 +POSTHOOK: Input: default@futurama_partitioned@season=4 +POSTHOOK: Input: default@futurama_partitioned@season=5 +POSTHOOK: Input: default@futurama_partitioned@season=6 +POSTHOOK: Input: default@futurama_partitioned@season=7 +#### A masked pattern was here #### +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +1 1 Space Pilot 3000 1 +2 2 The Series Has Landed 1 +3 3 I, Roommate 1 +4 4 Love's Labors Lost in Space 1 +5 5 Fear of a Bot Planet 1 +6 6 A Fishful of Dollars 1 +7 7 My Three Suns 1 +8 8 A Big Piece of Garbage 1 +9 9 Hell Is Other Robots 1 +10 1 A Flight to Remember 2 +11 2 Mars University 2 +12 3 When Aliens Attack 2 +13 4 Fry & the Slurm Factory 2 +14 5 I Second That Emotion 2 +15 6 Brannigan Begin Again 2 +16 7 A Head in the Polls 2 +17 8 Xmas Story 2 +18 9 Why Must I Be a Crustacean in Love 2 +20 10 Put Your Head on My Shoulder 2 +20 11 Lesser of Two Evils 2 +21 12 Raging Bender 2 +22 13 A Bicyclops Built for Two 2 +23 14 How Hermes Requisitioned His Groove Back 2 +24 15 A Clone of My Own 2 +25 16 The Deep South 2 +26 17 Bender Gets Made 2 +27 18 The Problem with Popplers 2 +28 19 Mother's Day 2 +29 20 Anthology of Interest (1) 2 +30 1 The Honking 3 +31 2 War Is the H-Word 3 +32 3 The Cryonic Woman 3 +33 4 Parasites Lost 3 +34 5 Amazon Women in the Mood 3 +35 6 Bendless Love 3 +36 7 The Day the Earth Stood Stupid 3 +37 8 That's Lobstertainment! 3 +38 9 The Birdbot of Ice-Catraz 3 +39 10 Luck of the Fryrish 3 +40 11 The Cyber House Rules 3 +41 12 Insane in the Mainframe 3 +42 13 Bendin' in the Wind 3 +43 14 Time Keeps on Slipping 3 +44 15 I Dated a Robot 3 +45 1 Roswell That Ends Well 4 +46 2 A Tale of Two Santas 4 +47 3 Anthology of Interest (2) 4 +48 4 Love and Rocket 4 +49 5 Leela's Homeworld 4 +50 6 Where the Buggalo Roam 4 +51 7 A Pharaoh to Remember 4 +52 8 Godfellas 4 +53 9 Futurestock 4 +54 10 A Leela of Her Own 4 +55 11 30% Iron Chef 4 +56 12 Where No Fan Has Gone Before 4 +57 1 Crimes of the Hot 5 +58 2 Jurassic Bark 5 +59 3 The Route of All Evil 5 +60 4 A Taste of Freedom 5 +61 5 Kif Gets Knocked Up a Notch 5 +62 6 Less Than Hero 5 +63 7 Teenage Mutant Leela's Hurdles 5 +64 8 The Why of Fry 5 +65 9 The Sting 5 +66 10 The Farnsworth Parabox 5 +67 11 Three Hundred Big Boys 5 +68 12 Spanish Fry 5 +69 13 Bend Her 5 +70 14 Obsoletely Fabulous 5 +71 15 Bender Should Not Be Allowed on Television 5 +72 16 The Devil's Hands Are Idle Playthings 5 +73 1 Rebirth 6 +74 2 In-A-Gadda-Da-Leela 6 +75 3 Attack of the Killer App 6 +76 4 Proposition Infinity 6 +77 5 The Duh-Vinci Code 6 +78 6 Lethal Inspection 6 +79 7 The Late Philip J. Fry 6 +80 8 That Darn Katz! 6 +81 9 A Clockwork Origin 6 +82 10 The Prisoner of Benda 6 +83 11 Lrrreconcilable Ndndifferences 6 +84 12 The Mutants Are Revolting 6 +85 13 The Futurama Holiday Spectacular 6 +86 14 Neutopia 6 +87 15 Benderama 6 +88 16 Ghost in the Machines 6 +89 17 Law and Oracle 6 +90 18 The Silence of the Clamps 6 +91 19 Yo Leela Leela 6 +92 20 All the Presidents' Heads 6 +93 21 Möbius Dick 6 +94 22 Fry am the Egg Man 6 +95 23 The Tip of the Zoidberg 6 +96 24 Cold Warriors 6 +97 25 Overclockwise 6 +98 26 Reincarnation 6 +99 1 The Bots and the Bees 7 +100 2 A Farewell to Arms 7 +101 3 Decision 3012 7 +102 4 The Thief of Baghead 7 +103 5 Zapp Dingbat 7 +104 6 The Butterjunk Effect 7 +105 7 The Six Million Dollar Mon 7 +106 8 Fun on a Bun 7 +107 9 Free Will Hunting 7 +108 10 Near-Death Wish 7 +109 11 Viva Mars Vegas 7 +110 12 31st Century Fox 7 +111 13 Naturama 7 +PREHOOK: query: -- Want an MR job +SELECT distinct(title) AS unique_titles FROM futurama_partitioned +PREHOOK: type: QUERY +PREHOOK: Input: default@futurama_partitioned +PREHOOK: Input: default@futurama_partitioned@season=1 +PREHOOK: Input: default@futurama_partitioned@season=2 +PREHOOK: Input: default@futurama_partitioned@season=3 +PREHOOK: Input: default@futurama_partitioned@season=4 +PREHOOK: Input: default@futurama_partitioned@season=5 +PREHOOK: Input: default@futurama_partitioned@season=6 +PREHOOK: Input: default@futurama_partitioned@season=7 +#### A masked pattern was here #### +POSTHOOK: query: -- Want an MR job +SELECT distinct(title) AS unique_titles FROM futurama_partitioned +POSTHOOK: type: QUERY +POSTHOOK: Input: default@futurama_partitioned +POSTHOOK: Input: default@futurama_partitioned@season=1 +POSTHOOK: Input: default@futurama_partitioned@season=2 +POSTHOOK: Input: default@futurama_partitioned@season=3 +POSTHOOK: Input: default@futurama_partitioned@season=4 +POSTHOOK: Input: default@futurama_partitioned@season=5 +POSTHOOK: Input: default@futurama_partitioned@season=6 +POSTHOOK: Input: default@futurama_partitioned@season=7 +#### A masked pattern was here #### +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=1).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=2).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=3).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=4).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=5).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=6).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).episode SIMPLE [(futurama)futurama.FieldSchema(name:episode, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).id SIMPLE [(futurama)futurama.FieldSchema(name:id, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: futurama_partitioned PARTITION(season=7).title SIMPLE [(futurama)futurama.FieldSchema(name:title, type:string, comment:from deserializer), ] +30% Iron Chef +31st Century Fox +A Bicyclops Built for Two +A Big Piece of Garbage +A Clockwork Origin +A Clone of My Own +A Farewell to Arms +A Fishful of Dollars +A Flight to Remember +A Head in the Polls +A Leela of Her Own +A Pharaoh to Remember +A Tale of Two Santas +A Taste of Freedom +All the Presidents' Heads +Amazon Women in the Mood +Anthology of Interest (1) +Anthology of Interest (2) +Attack of the Killer App +Bend Her +Bender Gets Made +Bender Should Not Be Allowed on Television +Benderama +Bendin' in the Wind +Bendless Love +Brannigan Begin Again +Cold Warriors +Crimes of the Hot +Decision 3012 +Fear of a Bot Planet +Free Will Hunting +Fry & the Slurm Factory +Fry am the Egg Man +Fun on a Bun +Futurestock +Ghost in the Machines +Godfellas +Hell Is Other Robots +How Hermes Requisitioned His Groove Back +I Dated a Robot +I Second That Emotion +I, Roommate +In-A-Gadda-Da-Leela +Insane in the Mainframe +Jurassic Bark +Kif Gets Knocked Up a Notch +Law and Oracle +Leela's Homeworld +Less Than Hero +Lesser of Two Evils +Lethal Inspection +Love and Rocket +Love's Labors Lost in Space +Lrrreconcilable Ndndifferences +Luck of the Fryrish +Mars University +Mother's Day +My Three Suns +Möbius Dick +Naturama +Near-Death Wish +Neutopia +Obsoletely Fabulous +Overclockwise +Parasites Lost +Proposition Infinity +Put Your Head on My Shoulder +Raging Bender +Rebirth +Reincarnation +Roswell That Ends Well +Space Pilot 3000 +Spanish Fry +Teenage Mutant Leela's Hurdles +That Darn Katz! +That's Lobstertainment! +The Birdbot of Ice-Catraz +The Bots and the Bees +The Butterjunk Effect +The Cryonic Woman +The Cyber House Rules +The Day the Earth Stood Stupid +The Deep South +The Devil's Hands Are Idle Playthings +The Duh-Vinci Code +The Farnsworth Parabox +The Futurama Holiday Spectacular +The Honking +The Late Philip J. Fry +The Mutants Are Revolting +The Prisoner of Benda +The Problem with Popplers +The Route of All Evil +The Series Has Landed +The Silence of the Clamps +The Six Million Dollar Mon +The Sting +The Thief of Baghead +The Tip of the Zoidberg +The Why of Fry +Three Hundred Big Boys +Time Keeps on Slipping +Viva Mars Vegas +War Is the H-Word +When Aliens Attack +Where No Fan Has Gone Before +Where the Buggalo Roam +Why Must I Be a Crustacean in Love +Xmas Story +Yo Leela Leela +Zapp Dingbat