diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java index 75394f3..5f9a90b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java @@ -53,6 +53,8 @@ import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.ql.io.RCFileInputFormat; import org.apache.hadoop.hive.ql.io.RCFileOutputFormat; +import org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat; +import org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat; import org.apache.hadoop.hive.ql.io.orc.OrcSerde; @@ -73,6 +75,7 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.avro.AvroSerDe; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; @@ -144,6 +147,9 @@ protected static final String PARQUETFILE_INPUT = MapredParquetInputFormat.class.getName(); protected static final String PARQUETFILE_OUTPUT = MapredParquetOutputFormat.class.getName(); protected static final String PARQUETFILE_SERDE = ParquetHiveSerDe.class.getName(); + protected static final String AVROFILE_INPUT = AvroContainerInputFormat.class.getName(); + protected static final String AVROFILE_OUTPUT = AvroContainerOutputFormat.class.getName(); + protected static final String AVROFILE_SERDE = AvroSerDe.class.getName(); public HiveOperation getHiveOperation() { return hiveOperation; @@ -250,6 +256,12 @@ protected boolean fillStorageFormat(ASTNode child, AnalyzeCreateCommonVars share shared.serde = PARQUETFILE_SERDE; storageFormat = true; break; + case HiveParser.TOK_TBLAVROFILE: + inputFormat = AVROFILE_INPUT; + outputFormat = AVROFILE_OUTPUT; + shared.serde = AVROFILE_SERDE; + storageFormat = true; + break; case HiveParser.TOK_TABLEFILEFORMAT: inputFormat = unescapeSQLString(child.getChild(0).getText()); outputFormat = unescapeSQLString(child.getChild(1).getText()); @@ -285,6 +297,10 @@ protected void fillDefaultStorageFormat(AnalyzeCreateCommonVars shared) { inputFormat = PARQUETFILE_INPUT; outputFormat = PARQUETFILE_OUTPUT; shared.serde = PARQUETFILE_SERDE; + } else if ("AVRO".equalsIgnoreCase(conf.getVar(HiveConf.ConfVars.HIVEDEFAULTFILEFORMAT))) { + inputFormat = AVROFILE_INPUT; + outputFormat = AVROFILE_OUTPUT; + shared.serde = AVROFILE_SERDE; } else { inputFormat = TEXTFILE_INPUT; outputFormat = TEXTFILE_OUTPUT; @@ -931,7 +947,7 @@ public TableAccessInfo getTableAccessInfo() { /** * Sets the table access information. * - * @param taInfo The TableAccessInfo structure that is set in the optimization phase. + * @param tableAccessInfo The TableAccessInfo structure that is set in the optimization phase. */ public void setTableAccessInfo(TableAccessInfo tableAccessInfo) { this.tableAccessInfo = tableAccessInfo; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 6cd1f39..c546833 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -1373,6 +1373,11 @@ private void analyzeAlterTableFileFormat(ASTNode ast, String tableName, outputFormat = PARQUETFILE_OUTPUT; serde = PARQUETFILE_SERDE; break; + case HiveParser.TOK_TBLAVROFILE: + inputFormat = AVROFILE_INPUT; + outputFormat = AVROFILE_OUTPUT; + serde = AVROFILE_SERDE; + break; case HiveParser.TOK_FILEFORMAT_GENERIC: handleGenericFileFormat(child); break; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 412a046..481d8b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -156,6 +156,7 @@ KW_TEXTFILE: 'TEXTFILE'; KW_RCFILE: 'RCFILE'; KW_ORCFILE: 'ORC'; KW_PARQUETFILE: 'PARQUET'; +KW_AVROFILE: 'AVRO'; KW_INPUTFORMAT: 'INPUTFORMAT'; KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; KW_INPUTDRIVER: 'INPUTDRIVER'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index f934ac4..0c02632 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -184,6 +184,7 @@ TOK_TABLEROWFORMATLINES; TOK_TABLEROWFORMATNULL; TOK_TBLORCFILE; TOK_TBLPARQUETFILE; +TOK_TBLAVROFILE; TOK_TBLSEQUENCEFILE; TOK_TBLTEXTFILE; TOK_TBLRCFILE; @@ -1270,6 +1271,7 @@ fileFormat | KW_RCFILE -> ^(TOK_TBLRCFILE) | KW_ORCFILE -> ^(TOK_TBLORCFILE) | KW_PARQUETFILE -> ^(TOK_TBLPARQUETFILE) + | KW_AVROFILE -> ^(TOK_TBLAVROFILE) | KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral KW_SERDE serdeCls=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $serdeCls $inDriver? $outDriver?) | genericSpec=identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) @@ -1813,6 +1815,7 @@ tableFileFormat | KW_STORED KW_AS KW_RCFILE -> TOK_TBLRCFILE | KW_STORED KW_AS KW_ORCFILE -> TOK_TBLORCFILE | KW_STORED KW_AS KW_PARQUETFILE -> TOK_TBLPARQUETFILE + | KW_STORED KW_AS KW_AVROFILE -> TOK_TBLAVROFILE | KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) | KW_STORED KW_BY storageHandler=StringLiteral diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 9c001c1..21b62a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -538,5 +538,5 @@ functionIdentifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_PARQUETFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_SEQUENCEFILE | KW_TEXTFILE | KW_RCFILE | KW_ORCFILE | KW_PARQUETFILE |KW_AVROFILE | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION ; diff --git ql/src/test/queries/clientpositive/avro_compression_enabled_native.q ql/src/test/queries/clientpositive/avro_compression_enabled_native.q new file mode 100644 index 0000000..c579633 --- /dev/null +++ ql/src/test/queries/clientpositive/avro_compression_enabled_native.q @@ -0,0 +1,14 @@ +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +(number int, +first_name string, +last_name string, +extra_field string) +STORED AS AVRO; + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4; + +set hive.exec.compress.output=true; + +select count(*) from src; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/avro_decimal_native.q ql/src/test/queries/clientpositive/avro_decimal_native.q new file mode 100644 index 0000000..31058fb --- /dev/null +++ ql/src/test/queries/clientpositive/avro_decimal_native.q @@ -0,0 +1,40 @@ +DROP TABLE IF EXISTS dec; + +CREATE TABLE dec(name string, value decimal(8,4)); + +LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec; + +ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value; +DESC FORMATTED dec value; + +DROP TABLE IF EXISTS avro_dec; + +CREATE TABLE avro_dec( + name string COMMENT 'from deserializer', + value decimal(5,2) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO; + +DESC avro_dec; + +INSERT OVERWRITE TABLE avro_dec select name, value from dec; + +SELECT * FROM avro_dec; + +DROP TABLE IF EXISTS avro_dec1; + +CREATE TABLE avro_dec1( + name string COMMENT 'from deserializer', + value decimal(4,1) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO; + +DESC avro_dec1; + +LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1; + +select value from avro_dec1; + +DROP TABLE dec; +DROP TABLE avro_dec; +DROP TABLE avro_dec1; diff --git ql/src/test/queries/clientpositive/avro_joins_native.q ql/src/test/queries/clientpositive/avro_joins_native.q new file mode 100644 index 0000000..8370212 --- /dev/null +++ ql/src/test/queries/clientpositive/avro_joins_native.q @@ -0,0 +1,26 @@ +-- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +(number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role") +STORED AS AVRO; + +DESCRIBE doctors4; + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4; + +CREATE TABLE episodes +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO; + +DESCRIBE episodes; + +LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes; + +SELECT e.title, e.air_date, d.first_name, d.last_name, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/avro_native.q ql/src/test/queries/clientpositive/avro_native.q new file mode 100644 index 0000000..ee32f52 --- /dev/null +++ ql/src/test/queries/clientpositive/avro_native.q @@ -0,0 +1,14 @@ +-- SORT_QUERY_RESULTS + +-- verify that we can actually read avro files +CREATE TABLE doctors +(number int, +first_name string, +last_name string) +STORED AS AVRO; + +DESCRIBE doctors; + +LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors; + +SELECT * FROM doctors; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/avro_partitioned_native.q ql/src/test/queries/clientpositive/avro_partitioned_native.q new file mode 100644 index 0000000..d005e7e --- /dev/null +++ ql/src/test/queries/clientpositive/avro_partitioned_native.q @@ -0,0 +1,28 @@ +-- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO; + +LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes; + +CREATE TABLE episodes_partitioned +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO; + +SET hive.exec.dynamic.partition.mode=nonstrict; +INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes; + +SELECT * FROM episodes_partitioned WHERE doctor_pt > 6; + +-- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5; +-- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6; +-- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5; \ No newline at end of file diff --git ql/src/test/results/clientpositive/avro_compression_enabled_native.q.out ql/src/test/results/clientpositive/avro_compression_enabled_native.q.out new file mode 100644 index 0000000..08a1944 --- /dev/null +++ ql/src/test/results/clientpositive/avro_compression_enabled_native.q.out @@ -0,0 +1,38 @@ +PREHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +(number int, +first_name string, +last_name string, +extra_field string) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +(number int, +first_name string, +last_name string, +extra_field string) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors4 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors4 +PREHOOK: query: select count(*) from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +500 diff --git ql/src/test/results/clientpositive/avro_decimal_native.q.out ql/src/test/results/clientpositive/avro_decimal_native.q.out new file mode 100644 index 0000000..268ebf6 --- /dev/null +++ ql/src/test/results/clientpositive/avro_decimal_native.q.out @@ -0,0 +1,168 @@ +PREHOOK: query: DROP TABLE IF EXISTS dec +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS dec +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE dec(name string, value decimal(8,4)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE dec(name string, value decimal(8,4)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dec +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@dec +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.txt' into TABLE dec +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@dec +PREHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value +PREHOOK: type: QUERY +PREHOOK: Input: default@dec +#### A masked pattern was here #### +POSTHOOK: query: ANALYZE TABLE dec COMPUTE STATISTICS FOR COLUMNS value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec +#### A masked pattern was here #### +PREHOOK: query: DESC FORMATTED dec value +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@dec +POSTHOOK: query: DESC FORMATTED dec value +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@dec +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +value decimal(8,4) -12.25 234.79 0 6 from deserializer +PREHOOK: query: DROP TABLE IF EXISTS avro_dec +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS avro_dec +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_dec( + name string COMMENT 'from deserializer', + value decimal(5,2) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE avro_dec( + name string COMMENT 'from deserializer', + value decimal(5,2) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_dec +PREHOOK: query: DESC avro_dec +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_dec +POSTHOOK: query: DESC avro_dec +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_dec +name string from deserializer +value decimal(5,2) from deserializer +PREHOOK: query: INSERT OVERWRITE TABLE avro_dec select name, value from dec +PREHOOK: type: QUERY +PREHOOK: Input: default@dec +PREHOOK: Output: default@avro_dec +POSTHOOK: query: INSERT OVERWRITE TABLE avro_dec select name, value from dec +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dec +POSTHOOK: Output: default@avro_dec +POSTHOOK: Lineage: avro_dec.name SIMPLE [(dec)dec.FieldSchema(name:name, type:string, comment:null), ] +POSTHOOK: Lineage: avro_dec.value EXPRESSION [(dec)dec.FieldSchema(name:value, type:decimal(8,4), comment:null), ] +PREHOOK: query: SELECT * FROM avro_dec +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_dec +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM avro_dec +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_dec +#### A masked pattern was here #### +Tom 234.79 +Beck 77.34 +Snow 55.71 +Mary 4.33 +Cluck 5.96 +Tom -12.25 +Mary 33.33 +Tom 19 +Beck 0 +Beck 79.9 +PREHOOK: query: DROP TABLE IF EXISTS avro_dec1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS avro_dec1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE avro_dec1( + name string COMMENT 'from deserializer', + value decimal(4,1) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE avro_dec1( + name string COMMENT 'from deserializer', + value decimal(4,1) COMMENT 'from deserializer') +COMMENT 'just drop the schema right into the HQL' +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@avro_dec1 +PREHOOK: query: DESC avro_dec1 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@avro_dec1 +POSTHOOK: query: DESC avro_dec1 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@avro_dec1 +name string from deserializer +value decimal(4,1) from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dec.avro' into TABLE avro_dec1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@avro_dec1 +PREHOOK: query: select value from avro_dec1 +PREHOOK: type: QUERY +PREHOOK: Input: default@avro_dec1 +#### A masked pattern was here #### +POSTHOOK: query: select value from avro_dec1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@avro_dec1 +#### A masked pattern was here #### +234.8 +77.3 +55.7 +4.3 +6 +12.3 +33.3 +19 +3.2 +79.9 +PREHOOK: query: DROP TABLE dec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dec +PREHOOK: Output: default@dec +POSTHOOK: query: DROP TABLE dec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dec +POSTHOOK: Output: default@dec +PREHOOK: query: DROP TABLE avro_dec +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_dec +PREHOOK: Output: default@avro_dec +POSTHOOK: query: DROP TABLE avro_dec +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_dec +POSTHOOK: Output: default@avro_dec +PREHOOK: query: DROP TABLE avro_dec1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@avro_dec1 +PREHOOK: Output: default@avro_dec1 +POSTHOOK: query: DROP TABLE avro_dec1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@avro_dec1 +POSTHOOK: Output: default@avro_dec1 diff --git ql/src/test/results/clientpositive/avro_joins_native.q.out ql/src/test/results/clientpositive/avro_joins_native.q.out new file mode 100644 index 0000000..b7fdafc --- /dev/null +++ ql/src/test/results/clientpositive/avro_joins_native.q.out @@ -0,0 +1,92 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +(number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- SORT_QUERY_RESULTS + +-- verify that new joins bring in correct schemas (including evolved schemas) + +CREATE TABLE doctors4 +(number int COMMENT "Order of playing the role", + first_name string COMMENT "first name of actor playing role", + last_name string COMMENT "last name of actor playing role") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@doctors4 +PREHOOK: query: DESCRIBE doctors4 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@doctors4 +POSTHOOK: query: DESCRIBE doctors4 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@doctors4 +number int from deserializer +first_name string from deserializer +last_name string from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@doctors4 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' INTO TABLE doctors4 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@doctors4 +PREHOOK: query: CREATE TABLE episodes +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE episodes +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: DESCRIBE episodes +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@episodes +POSTHOOK: query: DESCRIBE episodes +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@episodes +title string from deserializer +air_date string from deserializer +doctor int from deserializer +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: SELECT e.title, e.air_date, d.first_name, d.last_name, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor) +PREHOOK: type: QUERY +PREHOOK: Input: default@doctors4 +PREHOOK: Input: default@episodes +#### A masked pattern was here #### +POSTHOOK: query: SELECT e.title, e.air_date, d.first_name, d.last_name, e.air_date +FROM doctors4 d JOIN episodes e ON (d.number=e.doctor) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@doctors4 +POSTHOOK: Input: default@episodes +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 William Hartnell 23 November 1963 +Castrolava 4 January 1982 Peter Davison 4 January 1982 +Horror of Fang Rock 3 September 1977 Tom Baker 3 September 1977 +Rose 26 March 2005 Christopher Eccleston 26 March 2005 +The Doctor's Wife 14 May 2011 Matt Smith 14 May 2011 +The Eleventh Hour 3 April 2010 Matt Smith 3 April 2010 +The Mysterious Planet 6 September 1986 Colin Baker 6 September 1986 +The Power of the Daleks 5 November 1966 Patrick Troughton 5 November 1966 diff --git ql/src/test/results/clientpositive/avro_partitioned_native.q.out ql/src/test/results/clientpositive/avro_partitioned_native.q.out new file mode 100644 index 0000000..3643e0f --- /dev/null +++ ql/src/test/results/clientpositive/avro_partitioned_native.q.out @@ -0,0 +1,146 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: -- SORT_QUERY_RESULTS +-- Verify that table scans work with partitioned Avro tables +CREATE TABLE episodes +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@episodes +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/episodes.avro' INTO TABLE episodes +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@episodes +PREHOOK: query: CREATE TABLE episodes_partitioned +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE episodes_partitioned +(title string COMMENT "episode title", + air_date string COMMENT "initial date", + doctor int COMMENT "main actor playing the Doctor in episode") +PARTITIONED BY (doctor_pt INT) +STORED AS AVRO +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@episodes_partitioned +PREHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes +PREHOOK: Output: default@episodes_partitioned +POSTHOOK: query: INSERT OVERWRITE TABLE episodes_partitioned PARTITION (doctor_pt) SELECT title, air_date, doctor, doctor as doctor_pt FROM episodes +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Output: default@episodes_partitioned@doctor_pt=9 +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=11).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=1).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=2).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=4).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=5).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=6).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).air_date SIMPLE [(episodes)episodes.FieldSchema(name:air_date, type:string, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).doctor SIMPLE [(episodes)episodes.FieldSchema(name:doctor, type:int, comment:from deserializer), ] +POSTHOOK: Lineage: episodes_partitioned PARTITION(doctor_pt=9).title SIMPLE [(episodes)episodes.FieldSchema(name:title, type:string, comment:from deserializer), ] +PREHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM episodes_partitioned WHERE doctor_pt > 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +Rose 26 March 2005 9 9 +The Doctor's Wife 14 May 2011 11 11 +The Eleventh Hour 3 April 2010 11 11 +PREHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=1 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=11 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=2 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=4 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=5 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +PREHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +POSTHOOK: query: -- Verify that Fetch works in addition to Map +SELECT * FROM episodes_partitioned ORDER BY air_date LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=1 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=11 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=2 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=4 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=5 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=9 +#### A masked pattern was here #### +An Unearthly Child 23 November 1963 1 1 +Horror of Fang Rock 3 September 1977 4 4 +Rose 26 March 2005 9 9 +The Doctor's Wife 14 May 2011 11 11 +The Eleventh Hour 3 April 2010 11 11 +PREHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +PREHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/filter to specific partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +POSTHOOK: Input: default@episodes_partitioned@doctor_pt=6 +#### A masked pattern was here #### +The Mysterious Planet 6 September 1986 6 6 +PREHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### +POSTHOOK: query: -- Fetch w/non-existent partition +SELECT * FROM episodes_partitioned WHERE doctor_pt = 7 LIMIT 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@episodes_partitioned +#### A masked pattern was here #### diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaGenerator.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaGenerator.java new file mode 100644 index 0000000..4cb36a9 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaGenerator.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.avro; + +import org.apache.avro.Schema; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class AvroSchemaGenerator { + public Schema createAvroSchema(List columnNames, List columnTypes, String namespace, String name, String doc) { + + List fields = new ArrayList(); + for (int i = 0; i < columnNames.size(); ++i) { + fields.addAll(getFields(columnTypes.get(i), createAvroField(columnNames.get(i), columnTypes.get(i)))); + } + Schema avroSchema = Schema.createRecord(name, doc, namespace, false); + avroSchema.setFields(fields); + + String schemaString = + "{ " + + "\"namespace\": \"" + namespace + "\", " + + "\"name\": \"" + name + "\", " + + avroSchema.toString().substring(1); + avroSchema = Schema.parse(schemaString); + return avroSchema; + } + + private Schema.Field createAvroField(String name, TypeInfo typeInfo) { + Schema.Field field = null; + switch(typeInfo.getCategory()) { + case PRIMITIVE: + field = createAvroPrimitive(name, typeInfo); + break; + case LIST: + field = createAvroArray(name,typeInfo); + break; + case MAP: + field = createAvroMap(name, typeInfo); + break; + case STRUCT: + field = createAvroRecord(name, typeInfo); + break; + case UNION: + field = createAvroUnion(name, typeInfo); + break; + } + + return field; + } + + private Schema.Field createAvroPrimitive(String name, TypeInfo typeInfo) { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + Schema.Field schema; + switch (primitiveTypeInfo.getPrimitiveCategory()) { + case STRING: + schema = new Schema.Field(name, Schema.create(Schema.Type.STRING), "", null); + break; + case BYTE: + schema = new Schema.Field(name, Schema.create(Schema.Type.BYTES), "", null); + break; + case INT: + schema = new Schema.Field(name, Schema.create(Schema.Type.INT), "", null); + break; + case LONG: + schema = new Schema.Field(name, Schema.create(Schema.Type.LONG), "", null); + break; + case FLOAT: + schema = new Schema.Field(name, Schema.create(Schema.Type.FLOAT), "", null); + break; + case DOUBLE: + schema = new Schema.Field(name, Schema.create(Schema.Type.DOUBLE), "", null); + break; + case BOOLEAN: + schema = new Schema.Field(name, Schema.create(Schema.Type.BOOLEAN), "", null); + break; + case DECIMAL: + DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo)typeInfo; + String precision = String.valueOf(decimalTypeInfo.precision()); + String scale = String.valueOf(decimalTypeInfo.scale()); + Schema decSchema = + Schema.parse("{\"type\":\"bytes\",\"logicalType\":\"decimal\",\"precision\":" + + precision + ",\"scale\":" + scale + "}"); + schema = new Schema.Field(name, decSchema, "", null); + break; + default: //null + schema = new Schema.Field(name, Schema.create(Schema.Type.NULL), "", null); + } + return schema; + } + + private Schema.Field createAvroUnion(String name, TypeInfo typeInfo) { + List childSchemas = new ArrayList(); + for(TypeInfo childTypeInfo: ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { + childSchemas.add(createAvroField("", childTypeInfo).schema()); + } + return new Schema.Field(name, Schema.createUnion(childSchemas), "", null); + } + + private Schema.Field createAvroRecord(String name, TypeInfo typeInfo) { + List childFields = new ArrayList(); + + final ArrayList allStructFieldNames = ((StructTypeInfo) typeInfo).getAllStructFieldNames(); + final ArrayList allStructFieldTypeInfos = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos(); + if (allStructFieldNames.size() != allStructFieldTypeInfos.size()) { + throw new IllegalArgumentException("Failed to generate avro schema from hive schema. " + + "name and column type differs. names = " + allStructFieldNames + ", types = " + + allStructFieldTypeInfos); + } + + for(int i = 0; i < allStructFieldNames.size(); ++i) { + final Schema.Field grandChildSchemaField = createAvroField(allStructFieldNames.get(i), allStructFieldTypeInfos.get(i)); + final List grandChildFields = getFields(allStructFieldTypeInfos.get(i), grandChildSchemaField); + childFields.addAll(grandChildFields); + } + return new Schema.Field(name, Schema.createRecord(childFields), "", null); + } + + private List getFields(TypeInfo allStructFieldTypeInfo, Schema.Field schemaField) { + return allStructFieldTypeInfo.getCategory() == ObjectInspector.Category.STRUCT ? + schemaField.schema().getFields() : + Arrays.asList(new Schema.Field(schemaField.name(), schemaField.schema(), schemaField.doc(), null)); + } + + private Schema.Field createAvroMap(String name, TypeInfo typeInfo) { + MapTypeInfo valueTypeInfo = (MapTypeInfo) typeInfo; + Schema valueSchema = createAvroField("", valueTypeInfo).schema(); + + return new Schema.Field(name, valueSchema, "", null); + } + + private Schema.Field createAvroArray(String name, TypeInfo typeInfo) { + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + Schema listSchema = createAvroField("", listTypeInfo).schema(); + return new Schema.Field(name, Schema.createArray(listSchema), "", null); + } +} \ No newline at end of file diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java index 1fe31e0..c9ef909 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerDe.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.serde2.avro; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Properties; @@ -29,6 +31,7 @@ import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Writable; /** @@ -70,7 +73,41 @@ public void initialize(Configuration configuration, Properties properties) throw columnNames = null; columnTypes = null; - schema = AvroSerdeUtils.determineSchemaOrReturnErrorSchema(properties); + if(properties.getProperty(AvroSerdeUtils.SCHEMA_LITERAL) != null || + properties.getProperty(AvroSerdeUtils.SCHEMA_URL) != null) { + schema = AvroSerdeUtils.determineSchemaOrReturnErrorSchema(properties); + } else { + // Get column names and sort order + final String columnNameProperty = properties.getProperty("columns"); + final String columnTypeProperty = properties.getProperty("columns.types"); + + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + if (columnNames.size() != columnTypes.size()) { + throw new IllegalArgumentException("AvroSerde initialization failed. Number of column " + + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + + columnTypes); + } + + AvroSchemaGenerator avroSchemaGenerator = new AvroSchemaGenerator(); + schema = avroSchemaGenerator.createAvroSchema(columnNames, columnTypes, + properties.getProperty("avro.schema.namespace"), + properties.getProperty("avro.schema.name"), + properties.getProperty("avro.schema.doc")); + + properties.setProperty(AvroSerdeUtils.SCHEMA_LITERAL, schema.toString()); + } + + LOG.info("Schema is " + schema); + if(configuration == null) { LOG.info("Configuration null, not inserting schema"); } else {