diff --git ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java index 60b4388..22e54d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/avro/AvroGenericRecordReader.java @@ -127,7 +127,7 @@ private Schema getSchema(JobConf job, FileSplit split) throws AvroSerdeException String s = job.get(AvroSerdeUtils.AVRO_SERDE_SCHEMA); if(s != null) { LOG.info("Found the avro schema in the job: " + s); - return Schema.parse(s); + return AvroSerdeUtils.getSchemaFor(s); } // No more places to get the schema from. Give up. May have to re-encode later. return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g index f448b16..9463ef1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/FromClauseParser.g @@ -144,7 +144,7 @@ fromSource @init { gParent.pushMsg("from source", state); } @after { gParent.popMsg(state); } : - ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource) (lateralView^)* + ((Identifier LPAREN)=> partitionedTableFunction | tableSource | subQuerySource | virtualTableSource) (lateralView^)* ; tableBucketSample @@ -256,3 +256,46 @@ searchCondition ; //----------------------------------------------------------------------------------- + +//-------- Row Constructor ---------------------------------------------------------- +//in support of SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as FOO(a,b,c) and +// INSERT INTO (col1,col2,...) VALUES(...),(...),... +// INSERT INTO
(col1,col2,...) SELECT * FROM (VALUES(1,2,3),(4,5,6),...) as Foo(a,b,c) +valueRowConstructor + : + LPAREN atomExpression (COMMA atomExpression)* RPAREN -> ^(TOK_VALUE_ROW atomExpression+) + ; + +valuesTableConstructor + : + valueRowConstructor (COMMA valueRowConstructor)* -> ^(TOK_VALUES_TABLE valueRowConstructor+) + ; + +/* +VALUES(1),(2) means 2 rows, 1 column each. +VALUES(1,2),(3,4) means 2 rows, 2 columns each. +VALUES(1,2,3) means 1 row, 3 columns +*/ +valuesClause + : + KW_VALUES valuesTableConstructor -> valuesTableConstructor + ; + +/* +This represents a clause like this: +(VALUES(1,2),(2,3)) as VirtTable(col1,col2) +*/ +virtualTableSource + : + LPAREN valuesClause RPAREN tableNameColList -> ^(TOK_VIRTUAL_TABLE tableNameColList valuesClause) + ; +/* +e.g. as VirtTable(col1,col2) +Note that we only want literals as column names +*/ +tableNameColList + : + KW_AS? identifier LPAREN identifier (COMMA identifier)* RPAREN -> ^(TOK_VIRTUAL_TABREF ^(TOK_TABNAME identifier) ^(TOK_COL_NAME identifier+)) + ; + +//----------------------------------------------------------------------------------- \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index 20334ac..ce05fff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -292,6 +292,7 @@ KW_TRANSACTIONS: 'TRANSACTIONS'; KW_REWRITE : 'REWRITE'; KW_AUTHORIZATION: 'AUTHORIZATION'; KW_CONF: 'CONF'; +KW_VALUES: 'VALUES'; // Operators // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index a76cad7..32db0c7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -331,6 +331,15 @@ TOK_RESOURCE_LIST; TOK_COMPACT; TOK_SHOW_COMPACTIONS; TOK_SHOW_TRANSACTIONS; +TOK_DELETE_FROM; +TOK_UPDATE_TABLE; +TOK_SET_COLUMNS_CLAUSE; +TOK_VALUE_ROW; +TOK_VALUES_TABLE; +TOK_VIRTUAL_TABLE; +TOK_VIRTUAL_TABREF; +TOK_ANONYMOUS; +TOK_COL_NAME; } @@ -469,6 +478,9 @@ import java.util.HashMap; xlateMap.put("KW_DEFINED", "DEFINED"); xlateMap.put("KW_SUBQUERY", "SUBQUERY"); xlateMap.put("KW_REWRITE", "REWRITE"); + xlateMap.put("KW_UPDATE", "UPDATE"); + + xlateMap.put("KW_VALUES", "VALUES"); // Operators xlateMap.put("DOT", "."); @@ -638,6 +650,8 @@ execStatement | exportStatement | importStatement | ddlStatement + | deleteStatement + | updateStatement ; loadStatement @@ -2095,11 +2109,28 @@ singleFromStatement ( b+=body )+ -> ^(TOK_QUERY fromClause body+) ; +/* +The valuesClause rule below ensures that the parse tree for +"insert into table FOO values (1,2),(3,4)" looks the same as +"insert into table FOO select a,b from (values(1,2),(3,4)) as BAR(a,b)" which itself is made to look +very similar to the tree for "insert into table FOO select a,b from BAR". Since virtual table name +is implicit, it's represented as TOK_ANONYMOUS. +*/ regularBody[boolean topLevel] : i=insertClause + ( s=selectStatement[topLevel] {$s.tree.getChild(1).replaceChildren(0, 0, $i.tree);} -> {$s.tree} + | + valuesClause + -> ^(TOK_QUERY + ^(TOK_FROM + ^(TOK_VIRTUAL_TABLE ^(TOK_VIRTUAL_TABREF ^(TOK_ANONYMOUS)) valuesClause) + ) + ^(TOK_INSERT {$i.tree} ^(TOK_SELECT ^(TOK_SELEXPR TOK_ALLCOLREF))) + ) + ) | selectStatement[topLevel] ; @@ -2208,3 +2239,34 @@ limitClause : KW_LIMIT num=Number -> ^(TOK_LIMIT $num) ; + +//DELETE FROM WHERE ...; +deleteStatement +@init { pushMsg("delete statement", state); } +@after { popMsg(state); } + : + KW_DELETE KW_FROM tableName (whereClause)? -> ^(TOK_DELETE_FROM tableName whereClause?) + ; + +/*SET = (3 + col2)*/ +columnAssignmentClause + : + tableOrColumn EQUAL^ atomExpression + ; + +/*SET col1 = 5, col2 = (4 + col4), ...*/ +setColumnsClause + : + KW_SET columnAssignmentClause (COMMA columnAssignmentClause)* -> ^(TOK_SET_COLUMNS_CLAUSE columnAssignmentClause* ) + ; + +/* + UPDATE
+ SET col1 = val1, col2 = val2... WHERE ... +*/ +updateStatement +@init { pushMsg("update statement", state); } +@after { popMsg(state); } + : + KW_UPDATE tableName setColumnsClause whereClause? -> ^(TOK_UPDATE_TABLE tableName setColumnsClause whereClause?) + ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 75897b8..34d2dfc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -538,5 +538,5 @@ functionIdentifier nonReserved : - KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION + KW_TRUE | KW_FALSE | KW_LIKE | KW_EXISTS | KW_ASC | KW_DESC | KW_ORDER | KW_GROUP | KW_BY | KW_AS | KW_INSERT | KW_OVERWRITE | KW_OUTER | KW_LEFT | KW_RIGHT | KW_FULL | KW_PARTITION | KW_PARTITIONS | KW_TABLE | KW_TABLES | KW_COLUMNS | KW_INDEX | KW_INDEXES | KW_REBUILD | KW_FUNCTIONS | KW_SHOW | KW_MSCK | KW_REPAIR | KW_DIRECTORY | KW_LOCAL | KW_USING | KW_CLUSTER | KW_DISTRIBUTE | KW_SORT | KW_UNION | KW_LOAD | KW_EXPORT | KW_IMPORT | KW_DATA | KW_INPATH | KW_IS | KW_NULL | KW_CREATE | KW_EXTERNAL | KW_ALTER | KW_CHANGE | KW_FIRST | KW_AFTER | KW_DESCRIBE | KW_DROP | KW_RENAME | KW_IGNORE | KW_PROTECTION | KW_TO | KW_COMMENT | KW_BOOLEAN | KW_TINYINT | KW_SMALLINT | KW_INT | KW_BIGINT | KW_FLOAT | KW_DOUBLE | KW_DATE | KW_DATETIME | KW_TIMESTAMP | KW_DECIMAL | KW_STRING | KW_ARRAY | KW_STRUCT | KW_UNIONTYPE | KW_PARTITIONED | KW_CLUSTERED | KW_SORTED | KW_INTO | KW_BUCKETS | KW_ROW | KW_ROWS | KW_FORMAT | KW_DELIMITED | KW_FIELDS | KW_TERMINATED | KW_ESCAPED | KW_COLLECTION | KW_ITEMS | KW_KEYS | KW_KEY_TYPE | KW_LINES | KW_STORED | KW_FILEFORMAT | KW_INPUTFORMAT | KW_OUTPUTFORMAT | KW_INPUTDRIVER | KW_OUTPUTDRIVER | KW_OFFLINE | KW_ENABLE | KW_DISABLE | KW_READONLY | KW_NO_DROP | KW_LOCATION | KW_BUCKET | KW_OUT | KW_OF | KW_PERCENT | KW_ADD | KW_REPLACE | KW_RLIKE | KW_REGEXP | KW_TEMPORARY | KW_EXPLAIN | KW_FORMATTED | KW_PRETTY | KW_DEPENDENCY | KW_LOGICAL | KW_SERDE | KW_WITH | KW_DEFERRED | KW_SERDEPROPERTIES | KW_DBPROPERTIES | KW_LIMIT | KW_SET | KW_UNSET | KW_TBLPROPERTIES | KW_IDXPROPERTIES | KW_VALUE_TYPE | KW_ELEM_TYPE | KW_MAPJOIN | KW_STREAMTABLE | KW_HOLD_DDLTIME | KW_CLUSTERSTATUS | KW_UTC | KW_UTCTIMESTAMP | KW_LONG | KW_DELETE | KW_PLUS | KW_MINUS | KW_FETCH | KW_INTERSECT | KW_VIEW | KW_IN | KW_DATABASES | KW_MATERIALIZED | KW_SCHEMA | KW_SCHEMAS | KW_GRANT | KW_REVOKE | KW_SSL | KW_UNDO | KW_LOCK | KW_LOCKS | KW_UNLOCK | KW_SHARED | KW_EXCLUSIVE | KW_PROCEDURE | KW_UNSIGNED | KW_WHILE | KW_READ | KW_READS | KW_PURGE | KW_RANGE | KW_ANALYZE | KW_BEFORE | KW_BETWEEN | KW_BOTH | KW_BINARY | KW_CONTINUE | KW_CURSOR | KW_TRIGGER | KW_RECORDREADER | KW_RECORDWRITER | KW_SEMI | KW_LATERAL | KW_TOUCH | KW_ARCHIVE | KW_UNARCHIVE | KW_COMPUTE | KW_STATISTICS | KW_USE | KW_OPTION | KW_CONCATENATE | KW_SHOW_DATABASE | KW_UPDATE | KW_RESTRICT | KW_CASCADE | KW_SKEWED | KW_ROLLUP | KW_CUBE | KW_DIRECTORIES | KW_FOR | KW_GROUPING | KW_SETS | KW_TRUNCATE | KW_NOSCAN | KW_USER | KW_ROLE | KW_ROLES | KW_INNER | KW_DEFINED | KW_ADMIN | KW_JAR | KW_FILE | KW_OWNER | KW_PRINCIPALS | KW_ALL | KW_DEFAULT | KW_NONE | KW_COMPACT | KW_COMPACTIONS | KW_TRANSACTIONS | KW_REWRITE | KW_AUTHORIZATION | KW_VALUES ; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 7a71ec7..3b98932 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -972,6 +972,8 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) ASTNode frm = (ASTNode) ast.getChild(0); if (frm.getToken().getType() == HiveParser.TOK_TABREF) { processTable(qb, frm); + } else if (frm.getToken().getType() == HiveParser.TOK_VIRTUAL_TABLE) { + throw new RuntimeException("VALUES() clause is not fully supported yet..."); } else if (frm.getToken().getType() == HiveParser.TOK_SUBQUERY) { processSubQuery(qb, frm); } else if (frm.getToken().getType() == HiveParser.TOK_LATERAL_VIEW || @@ -1164,6 +1166,10 @@ public boolean doPhase1(ASTNode ast, QB qb, Phase1Ctx ctx_1) case HiveParser.TOK_CTE: processCTE(qb, ast); break; + case HiveParser.TOK_DELETE_FROM: + throw new RuntimeException("DELETE is not (yet) implemented..."); + case HiveParser.TOK_UPDATE_TABLE: + throw new RuntimeException("UPDATE is not (yet) implemented..."); default: skipRecursion = false; break; diff --git ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java index 4a6dc61..f5bc427 100644 --- ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java +++ ql/src/java/org/apache/hadoop/hive/ql/processors/HiveCommand.java @@ -49,6 +49,9 @@ public static HiveCommand find(String[] command) { if (command.length > 1 && "role".equalsIgnoreCase(command[1])) { // special handling for set role r1 statement return null; + } else if(command.length > 1 && "from".equalsIgnoreCase(command[1])) { + //special handling for SQL "delete from
where..." + return null; } else if (COMMANDS.contains(cmd)) { return HiveCommand.valueOf(cmd); } diff --git ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java new file mode 100644 index 0000000..548215a --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/parse/TestIUD.java @@ -0,0 +1,221 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.parse; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.session.SessionState; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestIUD { + private static HiveConf conf; + + private ParseDriver pd; + private SemanticAnalyzer sA; + + @BeforeClass + public static void initialize() { + conf = new HiveConf(SemanticAnalyzer.class); + SessionState.start(conf); + } + + @Before + public void setup() throws SemanticException { + pd = new ParseDriver(); + sA = new SemanticAnalyzer(conf); + } + + ASTNode parse(String query) throws ParseException { + ASTNode nd = pd.parse(query); + return (ASTNode) nd.getChild(0); + } + @Test + public void testDeleteNoWhere() throws ParseException { + ASTNode ast = parse("DELETE FROM src"); + Assert.assertEquals("AST doesn't match", + "(TOK_DELETE_FROM " + + "(TOK_TABNAME src))", ast.toStringTree()); + } + @Test + public void testDeleteWithWhere() throws ParseException { + ASTNode ast = parse("DELETE FROM src WHERE key IS NOT NULL AND src.value < 0"); + Assert.assertEquals("AST doesn't match", + "(TOK_DELETE_FROM " + + "(TOK_TABNAME src) " + + "(TOK_WHERE " + + "(AND " + + "(TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL key)) " + + "(< (. (TOK_TABLE_OR_COL src) value) 0))))", + ast.toStringTree()); + } + @Test + public void testUpdateNoWhereSingleSet() throws ParseException { + ASTNode ast = parse("UPDATE src set key = 3"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3)))", + ast.toStringTree()); + } + @Test + public void testUpdateNoWhereMultiSet() throws ParseException { + ASTNode ast = parse("UPDATE src set key = 3, value = 8"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3) " + + "(= " + + "(TOK_TABLE_OR_COL value) 8)))", + ast.toStringTree()); + } + @Test + public void testUpdateWithWhereSingleSet() throws ParseException { + ASTNode ast = parse("UPDATE src SET key = 3 WHERE value IS NULL"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3)) " + + "(TOK_WHERE (TOK_FUNCTION TOK_ISNULL (TOK_TABLE_OR_COL value))))", + ast.toStringTree()); + } + @Test + public void testUpdateWithWhereMultiSet() throws ParseException { + ASTNode ast = parse("UPDATE src SET key = 3, value = 8 WHERE VALUE = 1230997"); + Assert.assertEquals("AST doesn't match", + "(TOK_UPDATE_TABLE " + + "(TOK_TABNAME src) " + + "(TOK_SET_COLUMNS_CLAUSE " + + "(= " + + "(TOK_TABLE_OR_COL key) 3) " + + "(= " + + "(TOK_TABLE_OR_COL value) 8)) " + + "(TOK_WHERE (= (TOK_TABLE_OR_COL VALUE) 1230997)))", + ast.toStringTree()); + } + @Test + public void testStandardInsertIntoTable() throws ParseException { + ASTNode ast = parse("INSERT into TABLE page_view SELECT pvs.viewTime, pvs.userid from page_view_stg pvs where pvs.userid is null"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_TABREF (TOK_TABNAME page_view_stg) pvs)) " + + "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME page_view))) " + + "(TOK_SELECT " + + "(TOK_SELEXPR (. (TOK_TABLE_OR_COL pvs) viewTime)) " + + "(TOK_SELEXPR (. (TOK_TABLE_OR_COL pvs) userid))) " + + "(TOK_WHERE (TOK_FUNCTION TOK_ISNULL (. (TOK_TABLE_OR_COL pvs) userid)))))", + ast.toStringTree()); + } + @Test + public void testSelectStarFromAnonymousVirtTable1Row() throws ParseException { + try { + parse("select * from values (3,4)"); + Assert.assertFalse("Expected ParseException", true); + } + catch(ParseException ex) { + Assert.assertEquals("Failure didn't match.", "line 1:21 missing EOF at '(' near 'values'",ex.getMessage()); + } + } + @Test + public void testSelectStarFromVirtTable1Row() throws ParseException { + ASTNode ast = parse("select * from (values (3,4)) as VC(a,b)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF (TOK_TABNAME VC) (TOK_COL_NAME a b)) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))", + ast.toStringTree()); + } + @Test + public void testSelectStarFromVirtTable2Row() throws ParseException { + ASTNode ast = parse("select * from (values (1,2),(3,4)) as VC(a,b)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF (TOK_TABNAME VC) (TOK_COL_NAME a b)) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2) (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))", + ast.toStringTree()); + } + @Test + public void testSelectStarFromVirtTable2RowNamedProjections() throws ParseException { + ASTNode ast = parse("select a as c, b as d from (values (1,2),(3,4)) as VC(a,b)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF (TOK_TABNAME VC) (TOK_COL_NAME a b)) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2) (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) " + + "(TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a) c) (TOK_SELEXPR (TOK_TABLE_OR_COL b) d))))", + ast.toStringTree()); + } + @Test + public void testInsertIntoTableAsSelectFromNamedVirtTable() throws ParseException { + ASTNode ast = parse("insert into table page_view select a,b as c from (values (1,2),(3,4)) as VC(a,b) where b = 9"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF (TOK_TABNAME VC) (TOK_COL_NAME a b)) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2) (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME page_view))) " + + "(TOK_SELECT " + + "(TOK_SELEXPR (TOK_TABLE_OR_COL a)) " + + "(TOK_SELEXPR (TOK_TABLE_OR_COL b) c)) " + + "(TOK_WHERE (= (TOK_TABLE_OR_COL b) 9))))", + ast.toStringTree()); + } + @Test + public void testInsertIntoTableFromAnonymousTable1Row() throws ParseException { + ASTNode ast = parse("insert into table page_view values(1,2)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF TOK_ANONYMOUS) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2)))) " + + "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME page_view))) " + + "(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))", + ast.toStringTree()); + } + @Test + public void testInsertIntoTableFromAnonymousTable() throws ParseException { + ASTNode ast = parse("insert into table page_view values(1,2),(3,4)"); + Assert.assertEquals("AST doesn't match", + "(TOK_QUERY " + + "(TOK_FROM " + + "(TOK_VIRTUAL_TABLE " + + "(TOK_VIRTUAL_TABREF TOK_ANONYMOUS) " + + "(TOK_VALUES_TABLE (TOK_VALUE_ROW 1 2) (TOK_VALUE_ROW 3 4)))) " + + "(TOK_INSERT (TOK_INSERT_INTO (TOK_TAB (TOK_TABNAME page_view))) " + + "(TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))", + ast.toStringTree()); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java index b554743..402a4ac 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java @@ -93,8 +93,8 @@ public void write(DataOutput out) throws IOException { @Override public void readFields(DataInput in) throws IOException { - Schema schema = Schema.parse(in.readUTF()); - fileSchema = Schema.parse(in.readUTF()); + Schema schema = AvroSerdeUtils.getSchemaFor(in.readUTF()); + fileSchema = AvroSerdeUtils.getSchemaFor(in.readUTF()); recordReaderID = UID.read(in); record = new GenericData.Record(schema); binaryDecoder = DecoderFactory.defaultFactory().createBinaryDecoder((InputStream) in, binaryDecoder); diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java index 8c5cf3e..7c48e9b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java @@ -29,7 +29,9 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.mapred.JobConf; +import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.math.BigInteger; import java.net.URI; import java.net.URISyntaxException; @@ -67,7 +69,7 @@ public static Schema determineSchemaOrThrowException(Properties properties) throws IOException, AvroSerdeException { String schemaString = properties.getProperty(SCHEMA_LITERAL); if(schemaString != null && !schemaString.equals(SCHEMA_NONE)) - return Schema.parse(schemaString); + return AvroSerdeUtils.getSchemaFor(schemaString); // Try pulling directly from URL schemaString = properties.getProperty(SCHEMA_URL); @@ -78,7 +80,7 @@ public static Schema determineSchemaOrThrowException(Properties properties) Schema s = getSchemaFromFS(schemaString, new Configuration()); if (s == null) { //in case schema is not a file system - return Schema.parse(new URL(schemaString).openStream()); + return AvroSerdeUtils.getSchemaFor(new URL(schemaString).openStream()); } return s; } catch (IOException ioe) { @@ -123,7 +125,7 @@ protected static Schema getSchemaFromFS(String schemaFSUrl, } try { in = fs.open(new Path(schemaFSUrl)); - Schema s = Schema.parse(in); + Schema s = AvroSerdeUtils.getSchemaFor(in); return s; } finally { if(in != null) in.close(); @@ -194,4 +196,31 @@ public static HiveDecimal getHiveDecimalFromByteBuffer(ByteBuffer byteBuffer, in return dec; } + public static Schema getSchemaFor(String str) { + Schema.Parser parser = new Schema.Parser(); + Schema schema = parser.parse(str); + return schema; + } + + public static Schema getSchemaFor(File file) { + Schema.Parser parser = new Schema.Parser(); + Schema schema; + try { + schema = parser.parse(file); + } catch (IOException e) { + throw new RuntimeException("Failed to parse Avro schema from " + file.getName(), e); + } + return schema; + } + + public static Schema getSchemaFor(InputStream stream) { + Schema.Parser parser = new Schema.Parser(); + Schema schema; + try { + schema = parser.parse(stream); + } catch (IOException e) { + throw new RuntimeException("Failed to parse Avro schema", e); + } + return schema; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java index 3dceb63..65f104d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaResolutionProblem.java @@ -55,5 +55,5 @@ " }\n" + " ]\n" + "}"; - public final static Schema SIGNAL_BAD_SCHEMA = Schema.parse(sentinelString); + public final static Schema SIGNAL_BAD_SCHEMA = AvroSerdeUtils.getSchemaFor(sentinelString); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java index 497a49c..4169558 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/TypeInfoToSchema.java @@ -38,7 +38,6 @@ */ public class TypeInfoToSchema { - private static final Schema.Parser PARSER = new Schema.Parser(); private long recordCounter = 0; /** @@ -139,7 +138,7 @@ private Schema createAvroPrimitive(TypeInfo typeInfo) { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; String precision = String.valueOf(decimalTypeInfo.precision()); String scale = String.valueOf(decimalTypeInfo.scale()); - schema = PARSER.parse("{" + + schema = AvroSerdeUtils.getSchemaFor("{" + "\"type\":\"bytes\"," + "\"logicalType\":\"decimal\"," + "\"precision\":" + precision + "," + diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java index 198bd24..3a33239 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java @@ -55,7 +55,7 @@ public void canDeserializeVoidType() throws IOException, SerDeException { " {\"name\": \"isANull\", \"type\": \"null\"}\n" + " ]\n" + "}"; - Schema s = Schema.parse(schemaString); + Schema s = AvroSerdeUtils.getSchemaFor(schemaString); GenericData.Record record = new GenericData.Record(s); record.put("isANull", null); @@ -83,7 +83,7 @@ public void canDeserializeVoidType() throws IOException, SerDeException { @Test public void canDeserializeMapsWithPrimitiveKeys() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.MAP_WITH_PRIMITIVE_VALUE_TYPE); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.MAP_WITH_PRIMITIVE_VALUE_TYPE); GenericData.Record record = new GenericData.Record(s); Map m = new Hashtable(); @@ -129,7 +129,7 @@ public void canDeserializeMapsWithPrimitiveKeys() throws SerDeException, IOExcep @Test public void canDeserializeArrays() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE); GenericData.Record record = new GenericData.Record(s); List list = new ArrayList(); @@ -187,7 +187,7 @@ public void canDeserializeArrays() throws SerDeException, IOException { @Test public void canDeserializeRecords() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); GenericData.Record record = new GenericData.Record(s); GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); innerRecord.put("int1", 42); @@ -246,7 +246,7 @@ private ResultPair(ObjectInspector oi, Object value, Object unionObject) { @Test public void canDeserializeUnions() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.UNION_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.UNION_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("aUnion", "this is a string"); @@ -295,7 +295,7 @@ private ResultPair unionTester(Schema s, GenericData.Record record) @Test // Enums are one of two types we fudge for Hive. Enums go in, Strings come out. public void canDeserializeEnums() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.ENUM_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.ENUM_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("baddies", new GenericData.EnumSymbol(s.getField("baddies").schema(),"DALEKS")); @@ -325,7 +325,7 @@ public void canDeserializeEnums() throws SerDeException, IOException { @Test // Fixed doesn't exist in Hive. Fixeds go in, lists of bytes go out. public void canDeserializeFixed() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.FIXED_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.FIXED_SCHEMA); GenericData.Record record = new GenericData.Record(s); byte [] bytes = "ANANCIENTBLUEBOX".getBytes(); @@ -361,7 +361,7 @@ public void canDeserializeFixed() throws SerDeException, IOException { @Test public void canDeserializeBytes() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.BYTES_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.BYTES_SCHEMA); GenericData.Record record = new GenericData.Record(s); byte [] bytes = "ANANCIENTBLUEBOX".getBytes(); @@ -400,7 +400,7 @@ public void canDeserializeBytes() throws SerDeException, IOException { @Test public void canDeserializeNullableTypes() throws IOException, SerDeException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.NULLABLE_STRING_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.NULLABLE_STRING_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("nullableString", "this is a string"); @@ -413,7 +413,7 @@ public void canDeserializeNullableTypes() throws IOException, SerDeException { @Test public void canDeserializeNullableEnums() throws IOException, SerDeException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.NULLABLE_ENUM_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.NULLABLE_ENUM_SCHEMA); GenericData.Record record = new GenericData.Record(s); record.put("nullableEnum", new GenericData.EnumSymbol(AvroSerdeUtils.getOtherTypeFromNullableType(s.getField("nullableEnum").schema()), "CYBERMEN")); @@ -426,7 +426,8 @@ public void canDeserializeNullableEnums() throws IOException, SerDeException { @Test public void canDeserializeMapWithNullablePrimitiveValues() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.MAP_WITH_NULLABLE_PRIMITIVE_VALUE_TYPE_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator + .MAP_WITH_NULLABLE_PRIMITIVE_VALUE_TYPE_SCHEMA); GenericData.Record record = new GenericData.Record(s); Map m = new HashMap(); @@ -504,7 +505,7 @@ private void verifyNullableType(GenericData.Record record, Schema s, String fiel @Test public void verifyCaching() throws SerDeException, IOException { - Schema s = Schema.parse(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.RECORD_SCHEMA); GenericData.Record record = new GenericData.Record(s); GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema()); innerRecord.put("int1", 42); @@ -541,7 +542,7 @@ public void verifyCaching() throws SerDeException, IOException { assertEquals(0, de.getReEncoderCache().size()); //Read the record with **different** record reader ID and **evolved** schema - Schema evolvedSchema = Schema.parse(s.toString()); + Schema evolvedSchema = AvroSerdeUtils.getSchemaFor(s.toString()); evolvedSchema.getField("aRecord").schema().addProp("Testing", "meaningless"); garw.setRecordReaderID(recordReaderID = new UID()); //New record reader ID row = diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java index 76c1940..337b44e 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java @@ -41,13 +41,13 @@ import org.junit.Test; public class TestAvroObjectInspectorGenerator { - private final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string"); - private final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int"); - private final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo("boolean"); - private final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo("bigint"); - private final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo("float"); - private final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo("double"); - private final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo("void"); + private static final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string"); + private static final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int"); + private static final TypeInfo BOOLEAN = TypeInfoFactory.getPrimitiveTypeInfo("boolean"); + private static final TypeInfo LONG = TypeInfoFactory.getPrimitiveTypeInfo("bigint"); + private static final TypeInfo FLOAT = TypeInfoFactory.getPrimitiveTypeInfo("float"); + private static final TypeInfo DOUBLE = TypeInfoFactory.getPrimitiveTypeInfo("double"); + private static final TypeInfo VOID = TypeInfoFactory.getPrimitiveTypeInfo("void"); // These schemata are used in other tests static public final String MAP_WITH_PRIMITIVE_VALUE_TYPE = "{\n" + @@ -265,7 +265,7 @@ public void failOnNonRecords() throws Exception { " \"symbols\" : [\"SPADES\", \"HEARTS\", \"DIAMONDS\", \"CLUBS\"]\n" + "}"; - Schema s = Schema.parse(nonRecordSchema); + Schema s = AvroSerdeUtils.getSchemaFor(nonRecordSchema); try { new AvroObjectInspectorGenerator(s); fail("Should not be able to handle non-record Avro types"); @@ -311,7 +311,7 @@ public void primitiveTypesWorkCorrectly() throws SerDeException { " }\n" + " ]\n" + "}"; - AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(Schema.parse(bunchOfPrimitives)); + AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(AvroSerdeUtils.getSchemaFor(bunchOfPrimitives)); String [] expectedColumnNames = {"aString", "anInt", "aBoolean", "aLong", "aFloat", "aDouble", "aNull"}; verifyColumnNames(expectedColumnNames, aoig.getColumnNames()); @@ -350,7 +350,7 @@ private void verifyColumnNames(String[] expectedColumnNames, List column @Test public void canHandleMapsWithPrimitiveValueTypes() throws SerDeException { - Schema s = Schema.parse(MAP_WITH_PRIMITIVE_VALUE_TYPE); + Schema s = AvroSerdeUtils.getSchemaFor(MAP_WITH_PRIMITIVE_VALUE_TYPE); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); verifyMap(aoig, "aMap"); } @@ -379,7 +379,7 @@ private void verifyMap(final AvroObjectInspectorGenerator aoig, final String fie @Test public void canHandleArrays() throws SerDeException { - Schema s = Schema.parse(ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE); + Schema s = AvroSerdeUtils.getSchemaFor(ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); // Column names @@ -398,7 +398,7 @@ public void canHandleArrays() throws SerDeException { @Test public void canHandleRecords() throws SerDeException { - Schema s = Schema.parse(RECORD_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(RECORD_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); // Column names @@ -429,7 +429,7 @@ public void canHandleRecords() throws SerDeException { @Test public void canHandleUnions() throws SerDeException { - Schema s = Schema.parse(UNION_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(UNION_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); // Column names @@ -452,7 +452,7 @@ public void canHandleUnions() throws SerDeException { @Test // Enums are one of two Avro types that Hive doesn't have any native support for. public void canHandleEnums() throws SerDeException { - Schema s = Schema.parse(ENUM_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(ENUM_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); // Column names - we lose the enumness of this schema @@ -466,7 +466,7 @@ public void canHandleEnums() throws SerDeException { @Test // Hive has no concept of Avro's fixed type. Fixed -> arrays of bytes public void canHandleFixed() throws SerDeException { - Schema s = Schema.parse(FIXED_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(FIXED_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); @@ -483,7 +483,7 @@ public void canHandleFixed() throws SerDeException { @Test // Avro considers bytes primitive, Hive doesn't. Make them list of tinyint. public void canHandleBytes() throws SerDeException { - Schema s = Schema.parse(BYTES_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(BYTES_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); @@ -500,7 +500,7 @@ public void canHandleBytes() throws SerDeException { @Test // That Union[T, NULL] is converted to just T. public void convertsNullableTypes() throws SerDeException { - Schema s = Schema.parse(NULLABLE_STRING_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(NULLABLE_STRING_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); assertEquals(1, aoig.getColumnNames().size()); @@ -517,14 +517,14 @@ public void convertsNullableTypes() throws SerDeException { @Test // That Union[T, NULL] is converted to just T, within a Map public void convertsMapsWithNullablePrimitiveTypes() throws SerDeException { - Schema s = Schema.parse(MAP_WITH_NULLABLE_PRIMITIVE_VALUE_TYPE_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(MAP_WITH_NULLABLE_PRIMITIVE_VALUE_TYPE_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); verifyMap(aoig, "aMap"); } @Test // That Union[T, NULL] is converted to just T. public void convertsNullableEnum() throws SerDeException { - Schema s = Schema.parse(NULLABLE_ENUM_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(NULLABLE_ENUM_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); assertEquals(1, aoig.getColumnNames().size()); @@ -542,10 +542,10 @@ public void convertsNullableEnum() throws SerDeException { @Test public void objectInspectorsAreCached() throws SerDeException { // Verify that Hive is caching the object inspectors for us. - Schema s = Schema.parse(KITCHEN_SINK_SCHEMA); + Schema s = AvroSerdeUtils.getSchemaFor(KITCHEN_SINK_SCHEMA); AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s); - Schema s2 = Schema.parse(KITCHEN_SINK_SCHEMA); + Schema s2 = AvroSerdeUtils.getSchemaFor(KITCHEN_SINK_SCHEMA); AvroObjectInspectorGenerator aoig2 = new AvroObjectInspectorGenerator(s2); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java index 072225d..803a987 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerde.java @@ -61,8 +61,8 @@ " ]\n" + "}"; - static final Schema originalSchema = Schema.parse(originalSchemaString); - static final Schema newSchema = Schema.parse(newSchemaString); + static final Schema originalSchema = AvroSerdeUtils.getSchemaFor(originalSchemaString); + static final Schema newSchema = AvroSerdeUtils.getSchemaFor(newSchemaString); @Test public void initializeDoesNotReuseSchemasFromConf() throws SerDeException { @@ -81,7 +81,7 @@ public void initializeDoesNotReuseSchemasFromConf() throws SerDeException { // Verify that the schema now within the configuration is the one passed // in via the properties - assertEquals(newSchema, Schema.parse(conf.get(AVRO_SERDE_SCHEMA))); + assertEquals(newSchema, AvroSerdeUtils.getSchemaFor(conf.get(AVRO_SERDE_SCHEMA))); } @Test diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java index 67d5570..af236f7 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerdeUtils.java @@ -58,7 +58,7 @@ "}"; private void testField(String schemaString, String fieldName, boolean shouldBeNullable) { - Schema s = Schema.parse(schemaString); + Schema s = AvroSerdeUtils.getSchemaFor(schemaString); assertEquals(shouldBeNullable, isNullableType(s.getField(fieldName).schema())); } @@ -106,11 +106,11 @@ public void isNullableTypeIdentifiesNonUnionTypes() { @Test public void getTypeFromNullableTypePositiveCase() { - Schema s = Schema.parse(NULLABLE_UNION); + Schema s = AvroSerdeUtils.getSchemaFor(NULLABLE_UNION); Schema typeFromNullableType = getOtherTypeFromNullableType(s.getField("mayBeNull").schema()); assertEquals(Schema.Type.STRING, typeFromNullableType.getType()); - s = Schema.parse(NULLABLE_UNION2); + s = AvroSerdeUtils.getSchemaFor(NULLABLE_UNION2); typeFromNullableType = getOtherTypeFromNullableType(s.getField("mayBeNull").schema()); assertEquals(Schema.Type.STRING, typeFromNullableType.getType()); } @@ -126,7 +126,7 @@ public void determineSchemaFindsLiterals() throws Exception { String schema = TestAvroObjectInspectorGenerator.RECORD_SCHEMA; Properties props = new Properties(); props.put(AvroSerdeUtils.SCHEMA_LITERAL, schema); - Schema expected = Schema.parse(schema); + Schema expected = AvroSerdeUtils.getSchemaFor(schema); assertEquals(expected, AvroSerdeUtils.determineSchemaOrThrowException(props)); } @@ -163,7 +163,7 @@ public void noneOptionWorksForSpecifyingSchemas() throws IOException, AvroSerdeE try { s = determineSchemaOrThrowException(props); assertNotNull(s); - assertEquals(Schema.parse(TestAvroObjectInspectorGenerator.RECORD_SCHEMA), s); + assertEquals(AvroSerdeUtils.getSchemaFor(TestAvroObjectInspectorGenerator.RECORD_SCHEMA), s); } catch(AvroSerdeException he) { fail("Should have parsed schema literal, not thrown exception."); } @@ -197,7 +197,7 @@ public void determineSchemaCanReadSchemaFromHDFS() throws IOException, AvroSerde Schema schemaFromHDFS = AvroSerdeUtils.getSchemaFromFS(onHDFS, miniDfs.getFileSystem().getConf()); - Schema expectedSchema = Schema.parse(schemaString); + Schema expectedSchema = AvroSerdeUtils.getSchemaFor(schemaString); assertEquals(expectedSchema, schemaFromHDFS); } finally { if(miniDfs != null) miniDfs.shutdown(); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java index f8161da..b573f50 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroSerializer.java @@ -53,7 +53,7 @@ private Schema buildSchema(String recordValues) { " \"fields\": [" + recordValues + " ] }"; - return Schema.parse(s); + return AvroSerdeUtils.getSchemaFor(s); } /** @@ -231,7 +231,7 @@ public void canSerializeUnions() throws SerDeException, IOException { public void canSerializeEnums() throws SerDeException, IOException { String type = "{\"type\": \"enum\", \"name\": \"enum1_values\", " + "\"symbols\":[\"BLUE\",\"RED\",\"GREEN\"]}"; - Schema schema = Schema.parse(type); + Schema schema = AvroSerdeUtils.getSchemaFor(type); String field = "{ \"name\":\"enum1\", \"type\": " + schema + " }"; for(enum1 e : enum1.values()) { GenericEnumSymbol symbol = new GenericData.EnumSymbol(schema, e.toString()); @@ -247,7 +247,7 @@ public void canSerializeNullableEnums() throws SerDeException, IOException { String type = "{\"type\": \"enum\", \"name\": \"enum1_values\",\n" + " \"namespace\": \"org.apache.hadoop.hive\",\n" + " \"symbols\":[\"BLUE\",\"RED\",\"GREEN\"]}"; - Schema schema = Schema.parse(type); + Schema schema = AvroSerdeUtils.getSchemaFor(type); String field = "{ \"name\":\"nullableenum\", \"type\": [\"null\", " + schema + "] }"; GenericEnumSymbol symbol = new GenericData.EnumSymbol(schema, enum1.BLUE.toString()); GenericRecord r = serializeAndDeserialize(field, "nullableenum", symbol); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestGenericAvroRecordWritable.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestGenericAvroRecordWritable.java index cf3b16c..fb13b47 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestGenericAvroRecordWritable.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestGenericAvroRecordWritable.java @@ -50,7 +50,7 @@ @Test public void writableContractIsImplementedCorrectly() throws IOException { - Schema schema = Schema.parse(schemaJSON); + Schema schema = AvroSerdeUtils.getSchemaFor(schemaJSON); GenericRecord gr = new GenericData.Record(schema); gr.put("first", "The"); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaReEncoder.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaReEncoder.java index 8dd6109..922ca7c 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaReEncoder.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestSchemaReEncoder.java @@ -27,6 +27,7 @@ import org.junit.Test; public class TestSchemaReEncoder { + @Test public void schemasCanAddFields() throws SerDeException { String original = "{\n" + @@ -56,8 +57,8 @@ public void schemasCanAddFields() throws SerDeException { " }\n" + " ]\n" + "}"; - Schema originalSchema = Schema.parse(original); - Schema evolvedSchema = Schema.parse(evolved); + Schema originalSchema = AvroSerdeUtils.getSchemaFor(original); + Schema evolvedSchema = AvroSerdeUtils.getSchemaFor(evolved); GenericRecord record = new GenericData.Record(originalSchema); record.put("text", "it is a far better thing I do, yadda, yadda"); @@ -97,8 +98,8 @@ public void schemasCanAddFields() throws SerDeException { " }\n" + " ]\n" + "}"; - Schema originalSchema2 = Schema.parse(original2); - Schema evolvedSchema2 = Schema.parse(evolved2); + Schema originalSchema2 = AvroSerdeUtils.getSchemaFor(original2); + Schema evolvedSchema2 = AvroSerdeUtils.getSchemaFor(evolved2); record = new GenericData.Record(originalSchema2); record.put("a", 19); diff --git serde/src/test/org/apache/hadoop/hive/serde2/avro/TestThatEvolvedSchemasActAsWeWant.java serde/src/test/org/apache/hadoop/hive/serde2/avro/TestThatEvolvedSchemasActAsWeWant.java index 4b8cc98..70613d8 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/avro/TestThatEvolvedSchemasActAsWeWant.java +++ serde/src/test/org/apache/hadoop/hive/serde2/avro/TestThatEvolvedSchemasActAsWeWant.java @@ -34,6 +34,7 @@ import static org.junit.Assert.assertTrue; public class TestThatEvolvedSchemasActAsWeWant { + @Test public void resolvedSchemasShouldReturnReaderSchema() throws IOException { // Need to verify that when reading a datum with an updated reader schema @@ -68,7 +69,7 @@ public void resolvedSchemasShouldReturnReaderSchema() throws IOException { " ]\n" + "}"; - Schema[] schemas = {Schema.parse(v0), Schema.parse(v1)}; + Schema[] schemas = {AvroSerdeUtils.getSchemaFor(v0), AvroSerdeUtils.getSchemaFor(v1)}; // Encode a schema with v0, write out. GenericRecord record = new GenericData.Record(schemas[0]);