diff --git build.properties build.properties index 4d9eed8..8aa78c9 100644 --- build.properties +++ build.properties @@ -79,7 +79,7 @@ common.jar=${hadoop.root}/lib/commons-httpclient-3.0.1.jar # (measured in milliseconds). Ignored if fork is disabled. When running # multiple tests inside the same Java VM (see forkMode), timeout # applies to the time that all tests use together, not to an individual test. -test.junit.timeout=43200000 +test.junit.timeout=364000000 # Use this property to selectively disable tests from the command line: # ant test -Dtest.junit.exclude="**/TestCliDriver.class" diff --git data/files/datatypes.txt data/files/datatypes.txt index 87e0573..66ef826 100644 --- data/files/datatypes.txt +++ data/files/datatypes.txt @@ -1,3 +1,3 @@ -\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N --1false-1.1\N\N\N-1-1-1.0-1\N\N\N -1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789 +\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N\N +-1false-1.1\N\N\N-1-1-1.0-1\N\N\N\N +1true1.11121x2ykva92.2111.01abcd1111213142212212x1abcd22012-04-22 09:00:00.123456789123456789.0123456 diff --git data/files/kv7.txt data/files/kv7.txt new file mode 100644 index 0000000..9d995f8 --- /dev/null +++ data/files/kv7.txt @@ -0,0 +1,35 @@ +1E+99 0 +1E-99 0 +0 0 +100 100 +10 10 +1 1 +0.1 0 +0.01 0 +200 200 +20 20 +2 2 +0 0 +0.2 0 +0.02 0 +0.3 0 +0.33 0 +0.333 0 +-0.3 0 +-0.33 0 +-0.333 0 +1.0 1 +2 2 +3.14 3 +-1.12 -1 +-1.12 -1 +-1.122 -11 +1.12 1 +1.122 1 +124.00 124 +125.2 125 +-1255.49 -1255 +3.14 3 +3.14 3 +-1234567890.1234567890 -1234567890 +1234567890.1234567800 1234567890 diff --git jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveBaseResultSet.java jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveBaseResultSet.java index 3bfff92..bebcbda 100644 --- jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveBaseResultSet.java +++ jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveBaseResultSet.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.jdbc; import java.io.InputStream; import java.io.Reader; import java.math.BigDecimal; +import java.math.MathContext; import java.net.URL; import java.sql.Array; import java.sql.Blob; @@ -102,19 +103,27 @@ public abstract class HiveBaseResultSet implements ResultSet{ } public BigDecimal getBigDecimal(int columnIndex) throws SQLException { - throw new SQLException("Method not supported"); + Object obj = getObject(columnIndex); + if (obj == null) { + return null; + } + if (obj instanceof BigDecimal) { + return ((BigDecimal) obj); + } + throw new SQLException("Cannot convert column " + columnIndex + " to BigDecimal"); } public BigDecimal getBigDecimal(String columnName) throws SQLException { - throw new SQLException("Method not supported"); + return getBigDecimal(findColumn(columnName)); } public BigDecimal getBigDecimal(int columnIndex, int scale) throws SQLException { - throw new SQLException("Method not supported"); + MathContext mc = new MathContext(scale); + return getBigDecimal(columnIndex).round(mc); } public BigDecimal getBigDecimal(String columnName, int scale) throws SQLException { - throw new SQLException("Method not supported"); + return getBigDecimal(findColumn(columnName), scale); } public InputStream getBinaryStream(int columnIndex) throws SQLException { diff --git jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveResultSetMetaData.java jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveResultSetMetaData.java index 0121a66..48b2e86 100644 --- jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveResultSetMetaData.java +++ jdbc/src/java/org/apache/hadoop/hive/jdbc/HiveResultSetMetaData.java @@ -112,6 +112,8 @@ public class HiveResultSetMetaData implements java.sql.ResultSetMetaData { return serdeConstants.BIGINT_TYPE_NAME; } else if ("timestamp".equalsIgnoreCase(type)) { return serdeConstants.TIMESTAMP_TYPE_NAME; + } else if ("decimal".equalsIgnoreCase(type)) { + return serdeConstants.DECIMAL_TYPE_NAME; } else if (type.startsWith("map<")) { return serdeConstants.STRING_TYPE_NAME; } else if (type.startsWith("array<")) { diff --git jdbc/src/java/org/apache/hadoop/hive/jdbc/JdbcColumn.java jdbc/src/java/org/apache/hadoop/hive/jdbc/JdbcColumn.java index c33b346..920a4ba 100644 --- jdbc/src/java/org/apache/hadoop/hive/jdbc/JdbcColumn.java +++ jdbc/src/java/org/apache/hadoop/hive/jdbc/JdbcColumn.java @@ -82,6 +82,8 @@ public class JdbcColumn { // see http://download.oracle.com/javase/6/docs/api/constant-values.html#java.lang.Double.MAX_EXPONENT case Types.DOUBLE: return 25; // e.g. -(17#).e-#### + case Types.DECIMAL: + return Integer.MAX_VALUE; default: throw new SQLException("Invalid column type: " + columnType); } @@ -108,6 +110,8 @@ public class JdbcColumn { return 15; case Types.TIMESTAMP: return 29; + case Types.DECIMAL: + return Integer.MAX_VALUE; default: throw new SQLException("Invalid column type: " + columnType); } @@ -129,6 +133,8 @@ public class JdbcColumn { return 15; case Types.TIMESTAMP: return 9; + case Types.DECIMAL: + return Integer.MAX_VALUE; default: throw new SQLException("Invalid column type: " + columnType); } @@ -153,6 +159,8 @@ public class JdbcColumn { return 10; } else if (type.equalsIgnoreCase("bigint")) { return 10; + } else if (type.equalsIgnoreCase("decimal")) { + return 10; } else if (type.equalsIgnoreCase("float")) { return 2; } else if (type.equalsIgnoreCase("double")) { diff --git jdbc/src/java/org/apache/hadoop/hive/jdbc/Utils.java jdbc/src/java/org/apache/hadoop/hive/jdbc/Utils.java index 595a1ed..c93d00b 100644 --- jdbc/src/java/org/apache/hadoop/hive/jdbc/Utils.java +++ jdbc/src/java/org/apache/hadoop/hive/jdbc/Utils.java @@ -48,6 +48,8 @@ public class Utils { return Types.BIGINT; } else if ("timestamp".equalsIgnoreCase(type)) { return Types.TIMESTAMP; + } else if ("decimal".equalsIgnoreCase(type)) { + return Types.DECIMAL; } else if (type.startsWith("map<")) { return Types.VARCHAR; } else if (type.startsWith("array<")) { diff --git jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java index 1c59cb3..639418f 100644 --- jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java +++ jdbc/src/test/org/apache/hadoop/hive/jdbc/TestJdbcDriver.java @@ -148,7 +148,8 @@ public class TestJdbcDriver extends TestCase { + " c14 map>," + " c15 struct>," + " c16 array,n:int>>," - + " c17 timestamp) comment '"+dataTypeTableComment + + " c17 timestamp, " + + " c18 decimal) comment'" + dataTypeTableComment +"' partitioned by (dt STRING)"); assertFalse(res.next()); @@ -381,6 +382,7 @@ public class TestJdbcDriver extends TestCase { assertEquals("[]", res.getString(16)); assertEquals(null, res.getString(17)); assertEquals(null, res.getTimestamp(17)); + assertEquals(null, res.getBigDecimal(18)); // row 3 assertTrue(res.next()); @@ -402,6 +404,7 @@ public class TestJdbcDriver extends TestCase { assertEquals("[[{}, 1], [{c=d, a=b}, 2]]", res.getString(16)); assertEquals("2012-04-22 09:00:00.123456789", res.getString(17)); assertEquals("2012-04-22 09:00:00.123456789", res.getTimestamp(17).toString()); + assertEquals("123456789.0123456", res.getBigDecimal(18).toString()); // test getBoolean rules on non-boolean columns assertEquals(true, res.getBoolean(1)); @@ -803,13 +806,13 @@ public class TestJdbcDriver extends TestCase { ResultSet res = stmt.executeQuery( "select c1, c2, c3, c4, c5 as a, c6, c7, c8, c9, c10, c11, c12, " + - "c1*2, sentences(null, null, null) as b, c17 from " + dataTypeTableName + " limit 1"); + "c1*2, sentences(null, null, null) as b, c17, c18 from " + dataTypeTableName + " limit 1"); ResultSetMetaData meta = res.getMetaData(); ResultSet colRS = con.getMetaData().getColumns(null, null, dataTypeTableName.toLowerCase(), null); - assertEquals(15, meta.getColumnCount()); + assertEquals(16, meta.getColumnCount()); assertTrue(colRS.next()); @@ -1012,6 +1015,13 @@ public class TestJdbcDriver extends TestCase { assertEquals(29, meta.getPrecision(15)); assertEquals(9, meta.getScale(15)); + assertEquals("c18", meta.getColumnName(16)); + assertEquals(Types.DECIMAL, meta.getColumnType(16)); + assertEquals("decimal", meta.getColumnTypeName(16)); + assertEquals(Integer.MAX_VALUE, meta.getColumnDisplaySize(16)); + assertEquals(Integer.MAX_VALUE, meta.getPrecision(16)); + assertEquals(Integer.MAX_VALUE, meta.getScale(16)); + for (int i = 1; i <= meta.getColumnCount(); i++) { assertFalse(meta.isAutoIncrement(i)); assertFalse(meta.isCurrency(i)); diff --git metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 3d89e4c..bc365cb 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -395,7 +395,7 @@ public class MetaStoreUtils { org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME, "string"); typeToThriftTypeMap.put( org.apache.hadoop.hive.serde.serdeConstants.BINARY_TYPE_NAME, "binary"); - // These 3 types are not supported yet. + // These 4 types are not supported yet. // We should define a complex type date in thrift that contains a single int // member, and DynamicSerDe // should convert it to date type at runtime. @@ -406,6 +406,8 @@ public class MetaStoreUtils { typeToThriftTypeMap .put(org.apache.hadoop.hive.serde.serdeConstants.TIMESTAMP_TYPE_NAME, "timestamp"); + typeToThriftTypeMap.put( + org.apache.hadoop.hive.serde.serdeConstants.DECIMAL_TYPE_NAME, "decimal"); } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 31a2fe1..536a34f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -200,6 +200,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStringToMap; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToBinary; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToDecimal; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFToUtcTimestamp; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFTranslate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion; @@ -403,6 +404,8 @@ public final class FunctionRegistry { GenericUDFTimestamp.class); registerGenericUDF(serdeConstants.BINARY_TYPE_NAME, GenericUDFToBinary.class); + registerGenericUDF(serdeConstants.DECIMAL_TYPE_NAME, + GenericUDFToDecimal.class); // Aggregate functions registerGenericUDAF("max", new GenericUDAFMax()); @@ -636,7 +639,8 @@ public final class FunctionRegistry { registerNumericType(serdeConstants.BIGINT_TYPE_NAME, 4); registerNumericType(serdeConstants.FLOAT_TYPE_NAME, 5); registerNumericType(serdeConstants.DOUBLE_TYPE_NAME, 6); - registerNumericType(serdeConstants.STRING_TYPE_NAME, 7); + registerNumericType(serdeConstants.DECIMAL_TYPE_NAME, 7); + registerNumericType(serdeConstants.STRING_TYPE_NAME, 8); } /** @@ -705,27 +709,23 @@ public final class FunctionRegistry { return (ai > bi) ? a : b; } - /** - * Returns whether it is possible to implicitly convert an object of Class - * from to Class to. - */ - public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { + public static int implicitConvertableCost(TypeInfo from, TypeInfo to) { if (from.equals(to)) { - return true; + return 1; } // Allow implicit String to Double conversion if (from.equals(TypeInfoFactory.stringTypeInfo) && to.equals(TypeInfoFactory.doubleTypeInfo)) { - return true; + return 1; } // Void can be converted to any type if (from.equals(TypeInfoFactory.voidTypeInfo)) { - return true; + return 1; } if (from.equals(TypeInfoFactory.timestampTypeInfo) && to.equals(TypeInfoFactory.stringTypeInfo)) { - return true; + return 1; } // Allow implicit conversion from Byte -> Integer -> Long -> Float -> Double @@ -733,12 +733,28 @@ public final class FunctionRegistry { Integer f = numericTypes.get(from); Integer t = numericTypes.get(to); if (f == null || t == null) { - return false; + return -1; } if (f.intValue() > t.intValue()) { + return -1; + } + return ( + numericTypes.get(TypeInfoFactory.stringTypeInfo).intValue() - t.intValue()); + } + + + /** + * Returns whether it is possible to implicitly convert an object of Class + * from to Class to. + */ + public static boolean implicitConvertable(TypeInfo from, TypeInfo to) { + int cost = implicitConvertableCost(from, to); + + if (cost < 0) { return false; + } else { + return true; } - return true; } /** @@ -898,11 +914,13 @@ public final class FunctionRegistry { * (usually 0 for no conversion and 1 for conversion). */ public static int matchCost(TypeInfo argumentPassed, - TypeInfo argumentAccepted, boolean exact) { + TypeInfo argumentAccepted, boolean exact, boolean expanded) + throws UDFArgumentException { if (argumentAccepted.equals(argumentPassed)) { // matches return 0; } + if (argumentPassed.equals(TypeInfoFactory.voidTypeInfo)) { // passing null matches everything return 0; @@ -914,7 +932,7 @@ public final class FunctionRegistry { .getListElementTypeInfo(); TypeInfo argumentAcceptedElement = ((ListTypeInfo) argumentAccepted) .getListElementTypeInfo(); - return matchCost(argumentPassedElement, argumentAcceptedElement, exact); + return matchCost(argumentPassedElement, argumentAcceptedElement, exact, expanded); } if (argumentPassed.getCategory().equals(Category.MAP) && argumentAccepted.getCategory().equals(Category.MAP)) { @@ -927,8 +945,9 @@ public final class FunctionRegistry { .getMapValueTypeInfo(); TypeInfo argumentAcceptedValue = ((MapTypeInfo) argumentAccepted) .getMapValueTypeInfo(); - int cost1 = matchCost(argumentPassedKey, argumentAcceptedKey, exact); - int cost2 = matchCost(argumentPassedValue, argumentAcceptedValue, exact); + int cost1 = matchCost(argumentPassedKey, argumentAcceptedKey, exact, expanded); + int cost2 = matchCost(argumentPassedValue, argumentAcceptedValue, exact, expanded); + if (cost1 < 0 || cost2 < 0) { return -1; } @@ -940,13 +959,43 @@ public final class FunctionRegistry { // but there is a conversion cost. return 1; } - if (!exact && implicitConvertable(argumentPassed, argumentAccepted)) { + if (!exact && !expanded && implicitConvertable(argumentPassed, argumentAccepted)) { return 1; + } else if (!exact && expanded) { + return implicitConvertableCost(argumentPassed, argumentAccepted); } return -1; } + /* + public static enum ConversionPrecedence { + BIGDECIMAL(1, "bigdecimal"), + DOUBLE(2, "double"), + FLOAT(3, "float"), + LONG(4, "long"), + INTEGER(5, "int"), + BOOLEAN(6, "boolean"), + ; + + public final String typeName; + public final int typePrecedence; + + ConversionPrecedence(int precedence, String name) { + this.typeName = name; + this.typePrecedence = precedence; + } + + String getTypeName() { + return this.typeName; + } + + int findPrecedence(String name) { + return this.typePrecedence; + } + } + */ + /** * Gets the closest matching method corresponding to the argument list from a * list of methods. @@ -966,6 +1015,7 @@ public final class FunctionRegistry { List udfMethods = new ArrayList(); // The cost of the result int leastConversionCost = Integer.MAX_VALUE; + boolean expanded = false; for (Method m : mlist) { List argumentsAccepted = TypeInfoUtils.getParameterTypeInfos(m, @@ -980,7 +1030,7 @@ public final class FunctionRegistry { for (int i = 0; i < argumentsPassed.size() && match; i++) { int cost = matchCost(argumentsPassed.get(i), argumentsAccepted.get(i), - exact); + exact, expanded); if (cost == -1) { match = false; } else { @@ -1019,7 +1069,28 @@ public final class FunctionRegistry { } if (udfMethods.size() > 1) { // Ambiguous method found - throw new AmbiguousMethodException(udfClass, argumentsPassed, mlist); + Method lowestCostMethod = null; + expanded = true; + for (Method m : udfMethods) { + int conversionCost = 0; + int lowestCost = Integer.MAX_VALUE; + List argumentsAccepted = + TypeInfoUtils.getParameterTypeInfos(m, argumentsPassed.size()); + for (int i = 0; i < argumentsPassed.size(); i++) { + int cost = 0; + cost = matchCost(argumentsPassed.get(i), argumentsAccepted.get(i), exact, expanded); + if (cost == -1) { + break; + } + conversionCost += cost; + } + if (conversionCost < lowestCost) { + lowestCost = conversionCost; + lowestCostMethod = m; + } + } + + return lowestCostMethod; } return udfMethods.get(0); } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index 60c207b..8a19a26 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -147,6 +147,7 @@ public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer { TokenToTypeName.put(HiveParser.TOK_DATE, serdeConstants.DATE_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_DATETIME, serdeConstants.DATETIME_TYPE_NAME); TokenToTypeName.put(HiveParser.TOK_TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME); + TokenToTypeName.put(HiveParser.TOK_DECIMAL, serdeConstants.DECIMAL_TYPE_NAME); } public static String getTypeName(int token) throws SemanticException { diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g index 745a185..4364b09 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g @@ -108,6 +108,7 @@ TOK_DATETIME; TOK_TIMESTAMP; TOK_STRING; TOK_BINARY; +TOK_DECIMAL; TOK_LIST; TOK_STRUCT; TOK_MAP; @@ -1470,6 +1471,7 @@ primitiveType | KW_TIMESTAMP -> TOK_TIMESTAMP | KW_STRING -> TOK_STRING | KW_BINARY -> TOK_BINARY + | KW_DECIMAL -> TOK_DECIMAL ; listType @@ -2438,6 +2440,7 @@ KW_DOUBLE: 'DOUBLE'; KW_DATE: 'DATE'; KW_DATETIME: 'DATETIME'; KW_TIMESTAMP: 'TIMESTAMP'; +KW_DECIMAL: 'DECIMAL'; KW_STRING: 'STRING'; KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g.orig ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g.orig new file mode 100644 index 0000000..745a185 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g.orig @@ -0,0 +1,2703 @@ +/** + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ +grammar Hive; + +options +{ +output=AST; +ASTLabelType=CommonTree; +backtrack=false; +k=3; +} + +tokens { +TOK_INSERT; +TOK_QUERY; +TOK_SELECT; +TOK_SELECTDI; +TOK_SELEXPR; +TOK_FROM; +TOK_TAB; +TOK_PARTSPEC; +TOK_PARTVAL; +TOK_DIR; +TOK_LOCAL_DIR; +TOK_TABREF; +TOK_SUBQUERY; +TOK_INSERT_INTO; +TOK_DESTINATION; +TOK_ALLCOLREF; +TOK_TABLE_OR_COL; +TOK_FUNCTION; +TOK_FUNCTIONDI; +TOK_FUNCTIONSTAR; +TOK_WHERE; +TOK_OP_EQ; +TOK_OP_NE; +TOK_OP_LE; +TOK_OP_LT; +TOK_OP_GE; +TOK_OP_GT; +TOK_OP_DIV; +TOK_OP_ADD; +TOK_OP_SUB; +TOK_OP_MUL; +TOK_OP_MOD; +TOK_OP_BITAND; +TOK_OP_BITNOT; +TOK_OP_BITOR; +TOK_OP_BITXOR; +TOK_OP_AND; +TOK_OP_OR; +TOK_OP_NOT; +TOK_OP_LIKE; +TOK_TRUE; +TOK_FALSE; +TOK_TRANSFORM; +TOK_SERDE; +TOK_SERDENAME; +TOK_SERDEPROPS; +TOK_EXPLIST; +TOK_ALIASLIST; +TOK_GROUPBY; +TOK_ROLLUP_GROUPBY; +TOK_CUBE_GROUPBY; +TOK_GROUPING_SETS; +TOK_GROUPING_SETS_EXPRESSION; +TOK_HAVING; +TOK_ORDERBY; +TOK_CLUSTERBY; +TOK_DISTRIBUTEBY; +TOK_SORTBY; +TOK_UNION; +TOK_JOIN; +TOK_LEFTOUTERJOIN; +TOK_RIGHTOUTERJOIN; +TOK_FULLOUTERJOIN; +TOK_UNIQUEJOIN; +TOK_CROSSJOIN; +TOK_LOAD; +TOK_EXPORT; +TOK_IMPORT; +TOK_NULL; +TOK_ISNULL; +TOK_ISNOTNULL; +TOK_TINYINT; +TOK_SMALLINT; +TOK_INT; +TOK_BIGINT; +TOK_BOOLEAN; +TOK_FLOAT; +TOK_DOUBLE; +TOK_DATE; +TOK_DATETIME; +TOK_TIMESTAMP; +TOK_STRING; +TOK_BINARY; +TOK_LIST; +TOK_STRUCT; +TOK_MAP; +TOK_UNIONTYPE; +TOK_COLTYPELIST; +TOK_CREATEDATABASE; +TOK_CREATETABLE; +TOK_CREATEINDEX; +TOK_CREATEINDEX_INDEXTBLNAME; +TOK_DEFERRED_REBUILDINDEX; +TOK_DROPINDEX; +TOK_LIKETABLE; +TOK_DESCTABLE; +TOK_DESCFUNCTION; +TOK_ALTERTABLE_PARTITION; +TOK_ALTERTABLE_RENAME; +TOK_ALTERTABLE_ADDCOLS; +TOK_ALTERTABLE_RENAMECOL; +TOK_ALTERTABLE_RENAMEPART; +TOK_ALTERTABLE_REPLACECOLS; +TOK_ALTERTABLE_ADDPARTS; +TOK_ALTERTABLE_DROPPARTS; +TOK_ALTERTABLE_ALTERPARTS_PROTECTMODE; +TOK_ALTERTABLE_TOUCH; +TOK_ALTERTABLE_ARCHIVE; +TOK_ALTERTABLE_UNARCHIVE; +TOK_ALTERTABLE_SERDEPROPERTIES; +TOK_ALTERTABLE_SERIALIZER; +TOK_TABLE_PARTITION; +TOK_ALTERTABLE_FILEFORMAT; +TOK_ALTERTABLE_LOCATION; +TOK_ALTERTABLE_PROPERTIES; +TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION; +TOK_ALTERINDEX_REBUILD; +TOK_ALTERINDEX_PROPERTIES; +TOK_MSCK; +TOK_SHOWDATABASES; +TOK_SHOWTABLES; +TOK_SHOWCOLUMNS; +TOK_SHOWFUNCTIONS; +TOK_SHOWPARTITIONS; +TOK_SHOW_CREATETABLE; +TOK_SHOW_TABLESTATUS; +TOK_SHOW_TBLPROPERTIES; +TOK_SHOWLOCKS; +TOK_LOCKTABLE; +TOK_UNLOCKTABLE; +TOK_SWITCHDATABASE; +TOK_DROPDATABASE; +TOK_DROPTABLE; +TOK_DATABASECOMMENT; +TOK_TABCOLLIST; +TOK_TABCOL; +TOK_TABLECOMMENT; +TOK_TABLEPARTCOLS; +TOK_TABLEBUCKETS; +TOK_TABLEROWFORMAT; +TOK_TABLEROWFORMATFIELD; +TOK_TABLEROWFORMATCOLLITEMS; +TOK_TABLEROWFORMATMAPKEYS; +TOK_TABLEROWFORMATLINES; +TOK_TBLSEQUENCEFILE; +TOK_TBLTEXTFILE; +TOK_TBLRCFILE; +TOK_TABLEFILEFORMAT; +TOK_FILEFORMAT_GENERIC; +TOK_OFFLINE; +TOK_ENABLE; +TOK_DISABLE; +TOK_READONLY; +TOK_NO_DROP; +TOK_STORAGEHANDLER; +TOK_ALTERTABLE_CLUSTER_SORT; +TOK_TABCOLNAME; +TOK_TABLELOCATION; +TOK_PARTITIONLOCATION; +TOK_TABLEBUCKETSAMPLE; +TOK_TABLESPLITSAMPLE; +TOK_PERCENT; +TOK_LENGTH; +TOK_ROWCOUNT; +TOK_TMP_FILE; +TOK_TABSORTCOLNAMEASC; +TOK_TABSORTCOLNAMEDESC; +TOK_STRINGLITERALSEQUENCE; +TOK_CHARSETLITERAL; +TOK_CREATEFUNCTION; +TOK_DROPFUNCTION; +TOK_CREATEVIEW; +TOK_DROPVIEW; +TOK_ALTERVIEW_PROPERTIES; +TOK_ALTERVIEW_ADDPARTS; +TOK_ALTERVIEW_DROPPARTS; +TOK_ALTERVIEW_RENAME; +TOK_VIEWPARTCOLS; +TOK_EXPLAIN; +TOK_TABLESERIALIZER; +TOK_TABLEPROPERTIES; +TOK_TABLEPROPLIST; +TOK_INDEXPROPERTIES; +TOK_INDEXPROPLIST; +TOK_TABTYPE; +TOK_LIMIT; +TOK_TABLEPROPERTY; +TOK_IFEXISTS; +TOK_IFNOTEXISTS; +TOK_ORREPLACE; +TOK_HINTLIST; +TOK_HINT; +TOK_MAPJOIN; +TOK_STREAMTABLE; +TOK_HOLD_DDLTIME; +TOK_HINTARGLIST; +TOK_USERSCRIPTCOLNAMES; +TOK_USERSCRIPTCOLSCHEMA; +TOK_RECORDREADER; +TOK_RECORDWRITER; +TOK_LEFTSEMIJOIN; +TOK_LATERAL_VIEW; +TOK_TABALIAS; +TOK_ANALYZE; +TOK_CREATEROLE; +TOK_DROPROLE; +TOK_GRANT; +TOK_REVOKE; +TOK_SHOW_GRANT; +TOK_PRIVILEGE_LIST; +TOK_PRIVILEGE; +TOK_PRINCIPAL_NAME; +TOK_USER; +TOK_GROUP; +TOK_ROLE; +TOK_GRANT_WITH_OPTION; +TOK_PRIV_ALL; +TOK_PRIV_ALTER_METADATA; +TOK_PRIV_ALTER_DATA; +TOK_PRIV_DROP; +TOK_PRIV_INDEX; +TOK_PRIV_LOCK; +TOK_PRIV_SELECT; +TOK_PRIV_SHOW_DATABASE; +TOK_PRIV_CREATE; +TOK_PRIV_OBJECT; +TOK_PRIV_OBJECT_COL; +TOK_GRANT_ROLE; +TOK_REVOKE_ROLE; +TOK_SHOW_ROLE_GRANT; +TOK_SHOWINDEXES; +TOK_INDEXCOMMENT; +TOK_DESCDATABASE; +TOK_DATABASEPROPERTIES; +TOK_DATABASELOCATION; +TOK_DBPROPLIST; +TOK_ALTERDATABASE_PROPERTIES; +TOK_ALTERTABLE_ALTERPARTS_MERGEFILES; +TOK_TABNAME; +TOK_TABSRC; +TOK_RESTRICT; +TOK_CASCADE; +TOK_TABLESKEWED; +TOK_TABCOLVALUE; +TOK_TABCOLVALUE_PAIR; +TOK_TABCOLVALUES; +TOK_ALTERTABLE_SKEWED; +TOK_ALTERTBLPART_SKEWED_LOCATION; +TOK_SKEWED_LOCATIONS; +TOK_SKEWED_LOCATION_LIST; +TOK_SKEWED_LOCATION_MAP; +TOK_STOREDASDIRS; +TOK_IGNOREPROTECTION; +} + + +// Package headers +@header { +package org.apache.hadoop.hive.ql.parse; +} +@lexer::header {package org.apache.hadoop.hive.ql.parse;} + + +@members { + Stack msgs = new Stack(); +} + +@rulecatch { +catch (RecognitionException e) { + reportError(e); + throw e; +} +} + +// starting rule +statement + : explainStatement EOF + | execStatement EOF + ; + +explainStatement +@init { msgs.push("explain statement"); } +@after { msgs.pop(); } + : KW_EXPLAIN (explainOptions=KW_EXTENDED|explainOptions=KW_FORMATTED|explainOptions=KW_DEPENDENCY)? execStatement + -> ^(TOK_EXPLAIN execStatement $explainOptions?) + ; + +execStatement +@init { msgs.push("statement"); } +@after { msgs.pop(); } + : queryStatementExpression + | loadStatement + | exportStatement + | importStatement + | ddlStatement + ; + +loadStatement +@init { msgs.push("load statement"); } +@after { msgs.pop(); } + : KW_LOAD KW_DATA (islocal=KW_LOCAL)? KW_INPATH (path=StringLiteral) (isoverwrite=KW_OVERWRITE)? KW_INTO KW_TABLE (tab=tableOrPartition) + -> ^(TOK_LOAD $path $tab $islocal? $isoverwrite?) + ; + +exportStatement +@init { msgs.push("export statement"); } +@after { msgs.pop(); } + : KW_EXPORT KW_TABLE (tab=tableOrPartition) KW_TO (path=StringLiteral) + -> ^(TOK_EXPORT $tab $path) + ; + +importStatement +@init { msgs.push("import statement"); } +@after { msgs.pop(); } + : KW_IMPORT ((ext=KW_EXTERNAL)? KW_TABLE (tab=tableOrPartition))? KW_FROM (path=StringLiteral) tableLocation? + -> ^(TOK_IMPORT $path $tab? $ext? tableLocation?) + ; + +ddlStatement +@init { msgs.push("ddl statement"); } +@after { msgs.pop(); } + : createDatabaseStatement + | switchDatabaseStatement + | dropDatabaseStatement + | createTableStatement + | dropTableStatement + | alterStatement + | descStatement + | showStatement + | metastoreCheck + | createViewStatement + | dropViewStatement + | createFunctionStatement + | createIndexStatement + | dropIndexStatement + | dropFunctionStatement + | analyzeStatement + | lockStatement + | unlockStatement + | createRoleStatement + | dropRoleStatement + | grantPrivileges + | revokePrivileges + | showGrants + | showRoleGrants + | grantRole + | revokeRole + ; + +ifExists +@init { msgs.push("if exists clause"); } +@after { msgs.pop(); } + : KW_IF KW_EXISTS + -> ^(TOK_IFEXISTS) + ; + +restrictOrCascade +@init { msgs.push("restrict or cascade clause"); } +@after { msgs.pop(); } + : KW_RESTRICT + -> ^(TOK_RESTRICT) + | KW_CASCADE + -> ^(TOK_CASCADE) + ; + +ifNotExists +@init { msgs.push("if not exists clause"); } +@after { msgs.pop(); } + : KW_IF KW_NOT KW_EXISTS + -> ^(TOK_IFNOTEXISTS) + ; + +storedAsDirs +@init { msgs.push("stored as directories"); } +@after { msgs.pop(); } + : KW_STORED KW_AS KW_DIRECTORIES + -> ^(TOK_STOREDASDIRS) + ; + +orReplace +@init { msgs.push("or replace clause"); } +@after { msgs.pop(); } + : KW_OR KW_REPLACE + -> ^(TOK_ORREPLACE) + ; + +ignoreProtection +@init { msgs.push("ignore protection clause"); } +@after { msgs.pop(); } + : KW_IGNORE KW_PROTECTION + -> ^(TOK_IGNOREPROTECTION) + ; + +createDatabaseStatement +@init { msgs.push("create database statement"); } +@after { msgs.pop(); } + : KW_CREATE (KW_DATABASE|KW_SCHEMA) + ifNotExists? + name=Identifier + databaseComment? + dbLocation? + (KW_WITH KW_DBPROPERTIES dbprops=dbProperties)? + -> ^(TOK_CREATEDATABASE $name ifNotExists? dbLocation? databaseComment? $dbprops?) + ; + +dbLocation +@init { msgs.push("database location specification"); } +@after { msgs.pop(); } + : + KW_LOCATION locn=StringLiteral -> ^(TOK_DATABASELOCATION $locn) + ; + +dbProperties +@init { msgs.push("dbproperties"); } +@after { msgs.pop(); } + : + LPAREN dbPropertiesList RPAREN -> ^(TOK_DATABASEPROPERTIES dbPropertiesList) + ; + +dbPropertiesList +@init { msgs.push("database properties list"); } +@after { msgs.pop(); } + : + keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_DBPROPLIST keyValueProperty+) + ; + + +switchDatabaseStatement +@init { msgs.push("switch database statement"); } +@after { msgs.pop(); } + : KW_USE Identifier + -> ^(TOK_SWITCHDATABASE Identifier) + ; + +dropDatabaseStatement +@init { msgs.push("drop database statement"); } +@after { msgs.pop(); } + : KW_DROP (KW_DATABASE|KW_SCHEMA) ifExists? Identifier restrictOrCascade? + -> ^(TOK_DROPDATABASE Identifier ifExists? restrictOrCascade?) + ; + +databaseComment +@init { msgs.push("database's comment"); } +@after { msgs.pop(); } + : KW_COMMENT comment=StringLiteral + -> ^(TOK_DATABASECOMMENT $comment) + ; + +createTableStatement +@init { msgs.push("create table statement"); } +@after { msgs.pop(); } + : KW_CREATE (ext=KW_EXTERNAL)? KW_TABLE ifNotExists? name=tableName + ( like=KW_LIKE likeName=tableName + tableLocation? + | (LPAREN columnNameTypeList RPAREN)? + tableComment? + tablePartition? + tableBuckets? + tableSkewed? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + (KW_AS selectStatement)? + ) + -> ^(TOK_CREATETABLE $name $ext? ifNotExists? + ^(TOK_LIKETABLE $likeName?) + columnNameTypeList? + tableComment? + tablePartition? + tableBuckets? + tableSkewed? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + selectStatement? + ) + ; + +createIndexStatement +@init { msgs.push("create index statement");} +@after {msgs.pop();} + : KW_CREATE KW_INDEX indexName=Identifier + KW_ON KW_TABLE tab=tableName LPAREN indexedCols=columnNameList RPAREN + KW_AS typeName=StringLiteral + autoRebuild? + indexPropertiesPrefixed? + indexTblName? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + indexComment? + ->^(TOK_CREATEINDEX $indexName $typeName $tab $indexedCols + autoRebuild? + indexPropertiesPrefixed? + indexTblName? + tableRowFormat? + tableFileFormat? + tableLocation? + tablePropertiesPrefixed? + indexComment?) + ; + +indexComment +@init { msgs.push("comment on an index");} +@after {msgs.pop();} + : + KW_COMMENT comment=StringLiteral -> ^(TOK_INDEXCOMMENT $comment) + ; + +autoRebuild +@init { msgs.push("auto rebuild index");} +@after {msgs.pop();} + : KW_WITH KW_DEFERRED KW_REBUILD + ->^(TOK_DEFERRED_REBUILDINDEX) + ; + +indexTblName +@init { msgs.push("index table name");} +@after {msgs.pop();} + : KW_IN KW_TABLE indexTbl=tableName + ->^(TOK_CREATEINDEX_INDEXTBLNAME $indexTbl) + ; + +indexPropertiesPrefixed +@init { msgs.push("table properties with prefix"); } +@after { msgs.pop(); } + : + KW_IDXPROPERTIES! indexProperties + ; + +indexProperties +@init { msgs.push("index properties"); } +@after { msgs.pop(); } + : + LPAREN indexPropertiesList RPAREN -> ^(TOK_INDEXPROPERTIES indexPropertiesList) + ; + +indexPropertiesList +@init { msgs.push("index properties list"); } +@after { msgs.pop(); } + : + keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_INDEXPROPLIST keyValueProperty+) + ; + +dropIndexStatement +@init { msgs.push("drop index statement");} +@after {msgs.pop();} + : KW_DROP KW_INDEX ifExists? indexName=Identifier KW_ON tab=tableName + ->^(TOK_DROPINDEX $indexName $tab ifExists?) + ; + +dropTableStatement +@init { msgs.push("drop statement"); } +@after { msgs.pop(); } + : KW_DROP KW_TABLE ifExists? tableName -> ^(TOK_DROPTABLE tableName ifExists?) + ; + +alterStatement +@init { msgs.push("alter statement"); } +@after { msgs.pop(); } + : KW_ALTER! + ( + KW_TABLE! alterTableStatementSuffix + | + KW_VIEW! alterViewStatementSuffix + | + KW_INDEX! alterIndexStatementSuffix + | + KW_DATABASE! alterDatabaseStatementSuffix + ) + ; + +alterTableStatementSuffix +@init { msgs.push("alter table statement"); } +@after { msgs.pop(); } + : alterStatementSuffixRename + | alterStatementSuffixAddCol + | alterStatementSuffixRenameCol + | alterStatementSuffixDropPartitions + | alterStatementSuffixAddPartitions + | alterStatementSuffixTouch + | alterStatementSuffixArchive + | alterStatementSuffixUnArchive + | alterStatementSuffixProperties + | alterTblPartitionStatement + | alterStatementSuffixClusterbySortby + | alterStatementSuffixSkewedby + ; + +alterViewStatementSuffix +@init { msgs.push("alter view statement"); } +@after { msgs.pop(); } + : alterViewSuffixProperties + | alterStatementSuffixRename + -> ^(TOK_ALTERVIEW_RENAME alterStatementSuffixRename) + | alterStatementSuffixAddPartitions + -> ^(TOK_ALTERVIEW_ADDPARTS alterStatementSuffixAddPartitions) + | alterStatementSuffixDropPartitions + -> ^(TOK_ALTERVIEW_DROPPARTS alterStatementSuffixDropPartitions) + ; + +alterIndexStatementSuffix +@init { msgs.push("alter index statement"); } +@after { msgs.pop(); } + : indexName=Identifier + (KW_ON tableNameId=Identifier) + partitionSpec? + ( + KW_REBUILD + ->^(TOK_ALTERINDEX_REBUILD $tableNameId $indexName partitionSpec?) + | + KW_SET KW_IDXPROPERTIES + indexProperties + ->^(TOK_ALTERINDEX_PROPERTIES $tableNameId $indexName indexProperties) + ) + ; + +alterDatabaseStatementSuffix +@init { msgs.push("alter database statement"); } +@after { msgs.pop(); } + : alterDatabaseSuffixProperties + ; + +alterDatabaseSuffixProperties +@init { msgs.push("alter database properties statement"); } +@after { msgs.pop(); } + : name=Identifier KW_SET KW_DBPROPERTIES dbProperties + -> ^(TOK_ALTERDATABASE_PROPERTIES $name dbProperties) + ; + +alterStatementSuffixRename +@init { msgs.push("rename statement"); } +@after { msgs.pop(); } + : oldName=Identifier KW_RENAME KW_TO newName=Identifier + -> ^(TOK_ALTERTABLE_RENAME $oldName $newName) + ; + +alterStatementSuffixAddCol +@init { msgs.push("add column statement"); } +@after { msgs.pop(); } + : Identifier (add=KW_ADD | replace=KW_REPLACE) KW_COLUMNS LPAREN columnNameTypeList RPAREN + -> {$add != null}? ^(TOK_ALTERTABLE_ADDCOLS Identifier columnNameTypeList) + -> ^(TOK_ALTERTABLE_REPLACECOLS Identifier columnNameTypeList) + ; + +alterStatementSuffixRenameCol +@init { msgs.push("rename column name"); } +@after { msgs.pop(); } + : Identifier KW_CHANGE KW_COLUMN? oldName=Identifier newName=Identifier colType (KW_COMMENT comment=StringLiteral)? alterStatementChangeColPosition? + ->^(TOK_ALTERTABLE_RENAMECOL Identifier $oldName $newName colType $comment? alterStatementChangeColPosition?) + ; + +alterStatementChangeColPosition + : first=KW_FIRST|KW_AFTER afterCol=Identifier + ->{$first != null}? ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION ) + -> ^(TOK_ALTERTABLE_CHANGECOL_AFTER_POSITION $afterCol) + ; + +alterStatementSuffixAddPartitions +@init { msgs.push("add partition statement"); } +@after { msgs.pop(); } + : Identifier KW_ADD ifNotExists? partitionSpec partitionLocation? (partitionSpec partitionLocation?)* + -> ^(TOK_ALTERTABLE_ADDPARTS Identifier ifNotExists? (partitionSpec partitionLocation?)+) + ; + +alterStatementSuffixTouch +@init { msgs.push("touch statement"); } +@after { msgs.pop(); } + : Identifier KW_TOUCH (partitionSpec)* + -> ^(TOK_ALTERTABLE_TOUCH Identifier (partitionSpec)*) + ; + +alterStatementSuffixArchive +@init { msgs.push("archive statement"); } +@after { msgs.pop(); } + : Identifier KW_ARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_ARCHIVE Identifier (partitionSpec)*) + ; + +alterStatementSuffixUnArchive +@init { msgs.push("unarchive statement"); } +@after { msgs.pop(); } + : Identifier KW_UNARCHIVE (partitionSpec)* + -> ^(TOK_ALTERTABLE_UNARCHIVE Identifier (partitionSpec)*) + ; + +partitionLocation +@init { msgs.push("partition location"); } +@after { msgs.pop(); } + : + KW_LOCATION locn=StringLiteral -> ^(TOK_PARTITIONLOCATION $locn) + ; + +alterStatementSuffixDropPartitions +@init { msgs.push("drop partition statement"); } +@after { msgs.pop(); } + : Identifier KW_DROP ifExists? dropPartitionSpec (COMMA dropPartitionSpec)* ignoreProtection? + -> ^(TOK_ALTERTABLE_DROPPARTS Identifier dropPartitionSpec+ ifExists? ignoreProtection?) + ; + +alterStatementSuffixProperties +@init { msgs.push("alter properties statement"); } +@after { msgs.pop(); } + : name=Identifier KW_SET KW_TBLPROPERTIES tableProperties + -> ^(TOK_ALTERTABLE_PROPERTIES $name tableProperties) + ; + +alterViewSuffixProperties +@init { msgs.push("alter view properties statement"); } +@after { msgs.pop(); } + : name=Identifier KW_SET KW_TBLPROPERTIES tableProperties + -> ^(TOK_ALTERVIEW_PROPERTIES $name tableProperties) + ; + +alterStatementSuffixSerdeProperties +@init { msgs.push("alter serdes statement"); } +@after { msgs.pop(); } + : KW_SET KW_SERDE serdeName=StringLiteral (KW_WITH KW_SERDEPROPERTIES tableProperties)? + -> ^(TOK_ALTERTABLE_SERIALIZER $serdeName tableProperties?) + | KW_SET KW_SERDEPROPERTIES tableProperties + -> ^(TOK_ALTERTABLE_SERDEPROPERTIES tableProperties) + ; + +tablePartitionPrefix +@init {msgs.push("table partition prefix");} +@after {msgs.pop();} + :name=Identifier partitionSpec? + ->^(TOK_TABLE_PARTITION $name partitionSpec?) + ; + +alterTblPartitionStatement +@init {msgs.push("alter table partition statement");} +@after {msgs.pop();} + : tablePartitionPrefix alterTblPartitionStatementSuffix + -> ^(TOK_ALTERTABLE_PARTITION tablePartitionPrefix alterTblPartitionStatementSuffix) + ; + +alterTblPartitionStatementSuffix +@init {msgs.push("alter table partition statement suffix");} +@after {msgs.pop();} + : alterStatementSuffixFileFormat + | alterStatementSuffixLocation + | alterStatementSuffixProtectMode + | alterStatementSuffixMergeFiles + | alterStatementSuffixSerdeProperties + | alterStatementSuffixRenamePart + | alterStatementSuffixBucketNum + | alterTblPartitionStatementSuffixSkewedLocation + ; + +alterStatementSuffixFileFormat +@init {msgs.push("alter fileformat statement"); } +@after {msgs.pop();} + : KW_SET KW_FILEFORMAT fileFormat + -> ^(TOK_ALTERTABLE_FILEFORMAT fileFormat) + ; + +alterTblPartitionStatementSuffixSkewedLocation +@init {msgs.push("alter partition skewed location");} +@after {msgs.pop();} + : KW_SET KW_SKEWED KW_LOCATION skewedLocations + -> ^(TOK_ALTERTBLPART_SKEWED_LOCATION skewedLocations) + ; + +skewedLocations +@init { msgs.push("skewed locations"); } +@after { msgs.pop(); } + : + LPAREN skewedLocationsList RPAREN -> ^(TOK_SKEWED_LOCATIONS skewedLocationsList) + ; + +skewedLocationsList +@init { msgs.push("skewed locations list"); } +@after { msgs.pop(); } + : + skewedLocationMap (COMMA skewedLocationMap)* -> ^(TOK_SKEWED_LOCATION_LIST skewedLocationMap+) + ; + +skewedLocationMap +@init { msgs.push("specifying skewed location map"); } +@after { msgs.pop(); } + : + key=skewedValueLocationElement EQUAL value=StringLiteral -> ^(TOK_SKEWED_LOCATION_MAP $key $value) + ; + +alterStatementSuffixLocation +@init {msgs.push("alter location");} +@after {msgs.pop();} + : KW_SET KW_LOCATION newLoc=StringLiteral + -> ^(TOK_ALTERTABLE_LOCATION $newLoc) + ; + + +alterStatementSuffixSkewedby +@init {msgs.push("alter skewed by statement");} +@after{msgs.pop();} + :name=Identifier tableSkewed + ->^(TOK_ALTERTABLE_SKEWED $name tableSkewed) + | + name=Identifier KW_NOT KW_SKEWED + ->^(TOK_ALTERTABLE_SKEWED $name) + | + name=Identifier KW_NOT storedAsDirs + ->^(TOK_ALTERTABLE_SKEWED $name storedAsDirs) + ; + +alterStatementSuffixProtectMode +@init { msgs.push("alter partition protect mode statement"); } +@after { msgs.pop(); } + : alterProtectMode + -> ^(TOK_ALTERTABLE_ALTERPARTS_PROTECTMODE alterProtectMode) + ; + +alterStatementSuffixRenamePart +@init { msgs.push("alter table rename partition statement"); } +@after { msgs.pop(); } + : KW_RENAME KW_TO partitionSpec + ->^(TOK_ALTERTABLE_RENAMEPART partitionSpec) + ; + +alterStatementSuffixMergeFiles +@init { msgs.push(""); } +@after { msgs.pop(); } + : KW_CONCATENATE + -> ^(TOK_ALTERTABLE_ALTERPARTS_MERGEFILES) + ; + +alterProtectMode +@init { msgs.push("protect mode specification enable"); } +@after { msgs.pop(); } + : KW_ENABLE alterProtectModeMode -> ^(TOK_ENABLE alterProtectModeMode) + | KW_DISABLE alterProtectModeMode -> ^(TOK_DISABLE alterProtectModeMode) + ; + +alterProtectModeMode +@init { msgs.push("protect mode specification enable"); } +@after { msgs.pop(); } + : KW_OFFLINE -> ^(TOK_OFFLINE) + | KW_NO_DROP KW_CASCADE? -> ^(TOK_NO_DROP KW_CASCADE?) + | KW_READONLY -> ^(TOK_READONLY) + ; + +alterStatementSuffixBucketNum +@init { msgs.push(""); } +@after { msgs.pop(); } + : KW_INTO num=Number KW_BUCKETS + -> ^(TOK_TABLEBUCKETS $num) + ; + +alterStatementSuffixClusterbySortby +@init {msgs.push("alter cluster by sort by statement");} +@after{msgs.pop();} + :name=Identifier tableBuckets + ->^(TOK_ALTERTABLE_CLUSTER_SORT $name tableBuckets) + | + name=Identifier KW_NOT KW_CLUSTERED + ->^(TOK_ALTERTABLE_CLUSTER_SORT $name) + ; + +fileFormat +@init { msgs.push("file format specification"); } +@after { msgs.pop(); } + : KW_SEQUENCEFILE -> ^(TOK_TBLSEQUENCEFILE) + | KW_TEXTFILE -> ^(TOK_TBLTEXTFILE) + | KW_RCFILE -> ^(TOK_TBLRCFILE) + | KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? + -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) + | genericSpec=Identifier -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) + ; + +tabTypeExpr +@init { msgs.push("specifying table types"); } +@after { msgs.pop(); } + + : Identifier (DOT^ (Identifier | KW_ELEM_TYPE | KW_KEY_TYPE | KW_VALUE_TYPE))* + ; + +descTabTypeExpr +@init { msgs.push("specifying describe table types"); } +@after { msgs.pop(); } + + : Identifier (DOT^ (Identifier | KW_ELEM_TYPE | KW_KEY_TYPE | KW_VALUE_TYPE))* Identifier? + ; + +partTypeExpr +@init { msgs.push("specifying table partitions"); } +@after { msgs.pop(); } + : tabTypeExpr partitionSpec? -> ^(TOK_TABTYPE tabTypeExpr partitionSpec?) + ; + +descPartTypeExpr +@init { msgs.push("specifying describe table partitions"); } +@after { msgs.pop(); } + : descTabTypeExpr partitionSpec? -> ^(TOK_TABTYPE descTabTypeExpr partitionSpec?) + ; + +descStatement +@init { msgs.push("describe statement"); } +@after { msgs.pop(); } + : (KW_DESCRIBE|KW_DESC) (descOptions=KW_FORMATTED|descOptions=KW_EXTENDED)? (parttype=descPartTypeExpr) -> ^(TOK_DESCTABLE $parttype $descOptions?) + | (KW_DESCRIBE|KW_DESC) KW_FUNCTION KW_EXTENDED? (name=descFuncNames) -> ^(TOK_DESCFUNCTION $name KW_EXTENDED?) + | (KW_DESCRIBE|KW_DESC) KW_DATABASE KW_EXTENDED? (dbName=Identifier) -> ^(TOK_DESCDATABASE $dbName KW_EXTENDED?) + ; + +analyzeStatement +@init { msgs.push("analyze statement"); } +@after { msgs.pop(); } + : KW_ANALYZE KW_TABLE (parttype=tableOrPartition) KW_COMPUTE KW_STATISTICS (KW_FOR KW_COLUMNS statsColumnName=columnNameList)? -> ^(TOK_ANALYZE $parttype $statsColumnName?) + ; + +showStatement +@init { msgs.push("show statement"); } +@after { msgs.pop(); } + : KW_SHOW (KW_DATABASES|KW_SCHEMAS) (KW_LIKE showStmtIdentifier)? -> ^(TOK_SHOWDATABASES showStmtIdentifier?) + | KW_SHOW KW_TABLES ((KW_FROM|KW_IN) db_name=Identifier)? (KW_LIKE showStmtIdentifier|showStmtIdentifier)? -> ^(TOK_SHOWTABLES (TOK_FROM $db_name)? showStmtIdentifier?) + | KW_SHOW KW_COLUMNS (KW_FROM|KW_IN) tabname=tableName ((KW_FROM|KW_IN) db_name=Identifier)? + -> ^(TOK_SHOWCOLUMNS $db_name? $tabname) + | KW_SHOW KW_FUNCTIONS showStmtIdentifier? -> ^(TOK_SHOWFUNCTIONS showStmtIdentifier?) + | KW_SHOW KW_PARTITIONS Identifier partitionSpec? -> ^(TOK_SHOWPARTITIONS Identifier partitionSpec?) + | KW_SHOW KW_CREATE KW_TABLE tabName=tableName -> ^(TOK_SHOW_CREATETABLE $tabName) + | KW_SHOW KW_TABLE KW_EXTENDED ((KW_FROM|KW_IN) db_name=Identifier)? KW_LIKE showStmtIdentifier partitionSpec? + -> ^(TOK_SHOW_TABLESTATUS showStmtIdentifier $db_name? partitionSpec?) + | KW_SHOW KW_TBLPROPERTIES tblName=Identifier (LPAREN prptyName=StringLiteral RPAREN)? -> ^(TOK_SHOW_TBLPROPERTIES $tblName $prptyName?) + | KW_SHOW KW_LOCKS (parttype=partTypeExpr)? (isExtended=KW_EXTENDED)? -> ^(TOK_SHOWLOCKS $parttype? $isExtended?) + | KW_SHOW (showOptions=KW_FORMATTED)? (KW_INDEX|KW_INDEXES) KW_ON showStmtIdentifier ((KW_FROM|KW_IN) db_name=Identifier)? + -> ^(TOK_SHOWINDEXES showStmtIdentifier $showOptions? $db_name?) + ; + +lockStatement +@init { msgs.push("lock statement"); } +@after { msgs.pop(); } + : KW_LOCK KW_TABLE tableName partitionSpec? lockMode -> ^(TOK_LOCKTABLE tableName lockMode partitionSpec?) + ; + +lockMode +@init { msgs.push("lock mode"); } +@after { msgs.pop(); } + : KW_SHARED | KW_EXCLUSIVE + ; + +unlockStatement +@init { msgs.push("unlock statement"); } +@after { msgs.pop(); } + : KW_UNLOCK KW_TABLE tableName partitionSpec? -> ^(TOK_UNLOCKTABLE tableName partitionSpec?) + ; + +createRoleStatement +@init { msgs.push("create role"); } +@after { msgs.pop(); } + : KW_CREATE kwRole roleName=Identifier + -> ^(TOK_CREATEROLE $roleName) + ; + +dropRoleStatement +@init {msgs.push("drop role");} +@after {msgs.pop();} + : KW_DROP kwRole roleName=Identifier + -> ^(TOK_DROPROLE $roleName) + ; + +grantPrivileges +@init {msgs.push("grant privileges");} +@after {msgs.pop();} + : KW_GRANT privList=privilegeList + privilegeObject? + KW_TO principalSpecification + (KW_WITH withOption)? + -> ^(TOK_GRANT $privList principalSpecification privilegeObject? withOption?) + ; + +revokePrivileges +@init {msgs.push("revoke privileges");} +@afer {msgs.pop();} + : KW_REVOKE privilegeList privilegeObject? KW_FROM principalSpecification + -> ^(TOK_REVOKE privilegeList principalSpecification privilegeObject?) + ; + +grantRole +@init {msgs.push("grant role");} +@after {msgs.pop();} + : KW_GRANT kwRole Identifier (COMMA Identifier)* KW_TO principalSpecification + -> ^(TOK_GRANT_ROLE principalSpecification Identifier+) + ; + +revokeRole +@init {msgs.push("revoke role");} +@after {msgs.pop();} + : KW_REVOKE kwRole Identifier (COMMA Identifier)* KW_FROM principalSpecification + -> ^(TOK_REVOKE_ROLE principalSpecification Identifier+) + ; + +showRoleGrants +@init {msgs.push("show role grants");} +@after {msgs.pop();} + : KW_SHOW kwRole KW_GRANT principalName + -> ^(TOK_SHOW_ROLE_GRANT principalName) + ; + +showGrants +@init {msgs.push("show grants");} +@after {msgs.pop();} + : KW_SHOW KW_GRANT principalName privilegeIncludeColObject? + -> ^(TOK_SHOW_GRANT principalName privilegeIncludeColObject?) + ; + +privilegeIncludeColObject +@init {msgs.push("privilege object including columns");} +@after {msgs.pop();} + : KW_ON (table=KW_TABLE|KW_DATABASE) Identifier (LPAREN cols=columnNameList RPAREN)? partitionSpec? + -> ^(TOK_PRIV_OBJECT_COL Identifier $table? $cols? partitionSpec?) + ; + +privilegeObject +@init {msgs.push("privilege subject");} +@after {msgs.pop();} + : KW_ON (table=KW_TABLE|KW_DATABASE) Identifier partitionSpec? + -> ^(TOK_PRIV_OBJECT Identifier $table? partitionSpec?) + ; + +privilegeList +@init {msgs.push("grant privilege list");} +@after {msgs.pop();} + : privlegeDef (COMMA privlegeDef)* + -> ^(TOK_PRIVILEGE_LIST privlegeDef+) + ; + +privlegeDef +@init {msgs.push("grant privilege");} +@after {msgs.pop();} + : privilegeType (LPAREN cols=columnNameList RPAREN)? + -> ^(TOK_PRIVILEGE privilegeType $cols?) + ; + +privilegeType +@init {msgs.push("privilege type");} +@after {msgs.pop();} + : KW_ALL -> ^(TOK_PRIV_ALL) + | KW_ALTER -> ^(TOK_PRIV_ALTER_METADATA) + | KW_UPDATE -> ^(TOK_PRIV_ALTER_DATA) + | KW_CREATE -> ^(TOK_PRIV_CREATE) + | KW_DROP -> ^(TOK_PRIV_DROP) + | KW_INDEX -> ^(TOK_PRIV_INDEX) + | KW_LOCK -> ^(TOK_PRIV_LOCK) + | KW_SELECT -> ^(TOK_PRIV_SELECT) + | KW_SHOW_DATABASE -> ^(TOK_PRIV_SHOW_DATABASE) + ; + +principalSpecification +@init { msgs.push("user/group/role name list"); } +@after { msgs.pop(); } + : principalName (COMMA principalName)* -> ^(TOK_PRINCIPAL_NAME principalName+) + ; + +principalName +@init {msgs.push("user|group|role name");} +@after {msgs.pop();} + : kwUser Identifier -> ^(TOK_USER Identifier) + | KW_GROUP Identifier -> ^(TOK_GROUP Identifier) + | kwRole Identifier -> ^(TOK_ROLE Identifier) + ; + +withOption +@init {msgs.push("grant with option");} +@after {msgs.pop();} + : KW_GRANT KW_OPTION + -> ^(TOK_GRANT_WITH_OPTION) + ; + +metastoreCheck +@init { msgs.push("metastore check statement"); } +@after { msgs.pop(); } + : KW_MSCK (repair=KW_REPAIR)? (KW_TABLE table=Identifier partitionSpec? (COMMA partitionSpec)*)? + -> ^(TOK_MSCK $repair? ($table partitionSpec*)?) + ; + +createFunctionStatement +@init { msgs.push("create function statement"); } +@after { msgs.pop(); } + : KW_CREATE KW_TEMPORARY KW_FUNCTION Identifier KW_AS StringLiteral + -> ^(TOK_CREATEFUNCTION Identifier StringLiteral) + ; + +dropFunctionStatement +@init { msgs.push("drop temporary function statement"); } +@after { msgs.pop(); } + : KW_DROP KW_TEMPORARY KW_FUNCTION ifExists? Identifier + -> ^(TOK_DROPFUNCTION Identifier ifExists?) + ; + +createViewStatement +@init { + msgs.push("create view statement"); +} +@after { msgs.pop(); } + : KW_CREATE (orReplace)? KW_VIEW (ifNotExists)? name=tableName + (LPAREN columnNameCommentList RPAREN)? tableComment? viewPartition? + tablePropertiesPrefixed? + KW_AS + selectStatement + -> ^(TOK_CREATEVIEW $name orReplace? + ifNotExists? + columnNameCommentList? + tableComment? + viewPartition? + tablePropertiesPrefixed? + selectStatement + ) + ; + +viewPartition +@init { msgs.push("view partition specification"); } +@after { msgs.pop(); } + : KW_PARTITIONED KW_ON LPAREN columnNameList RPAREN + -> ^(TOK_VIEWPARTCOLS columnNameList) + ; + +dropViewStatement +@init { msgs.push("drop view statement"); } +@after { msgs.pop(); } + : KW_DROP KW_VIEW ifExists? viewName -> ^(TOK_DROPVIEW viewName ifExists?) + ; + +showStmtIdentifier +@init { msgs.push("Identifier for show statement"); } +@after { msgs.pop(); } + : Identifier + | StringLiteral + ; + +tableComment +@init { msgs.push("table's comment"); } +@after { msgs.pop(); } + : + KW_COMMENT comment=StringLiteral -> ^(TOK_TABLECOMMENT $comment) + ; + +tablePartition +@init { msgs.push("table partition specification"); } +@after { msgs.pop(); } + : KW_PARTITIONED KW_BY LPAREN columnNameTypeList RPAREN + -> ^(TOK_TABLEPARTCOLS columnNameTypeList) + ; + +tableBuckets +@init { msgs.push("table buckets specification"); } +@after { msgs.pop(); } + : + KW_CLUSTERED KW_BY LPAREN bucketCols=columnNameList RPAREN (KW_SORTED KW_BY LPAREN sortCols=columnNameOrderList RPAREN)? KW_INTO num=Number KW_BUCKETS + -> ^(TOK_TABLEBUCKETS $bucketCols $sortCols? $num) + ; + +tableSkewed +@init { msgs.push("table skewed specification"); } +@after { msgs.pop(); } + : + KW_SKEWED KW_BY LPAREN skewedCols=columnNameList RPAREN KW_ON LPAREN (skewedValues=skewedValueElement) RPAREN (storedAsDirs)? + -> ^(TOK_TABLESKEWED $skewedCols $skewedValues storedAsDirs?) + ; + +rowFormat +@init { msgs.push("serde specification"); } +@after { msgs.pop(); } + : rowFormatSerde -> ^(TOK_SERDE rowFormatSerde) + | rowFormatDelimited -> ^(TOK_SERDE rowFormatDelimited) + | -> ^(TOK_SERDE) + ; + +recordReader +@init { msgs.push("record reader specification"); } +@after { msgs.pop(); } + : KW_RECORDREADER StringLiteral -> ^(TOK_RECORDREADER StringLiteral) + | -> ^(TOK_RECORDREADER) + ; + +recordWriter +@init { msgs.push("record writer specification"); } +@after { msgs.pop(); } + : KW_RECORDWRITER StringLiteral -> ^(TOK_RECORDWRITER StringLiteral) + | -> ^(TOK_RECORDWRITER) + ; + +rowFormatSerde +@init { msgs.push("serde format specification"); } +@after { msgs.pop(); } + : KW_ROW KW_FORMAT KW_SERDE name=StringLiteral (KW_WITH KW_SERDEPROPERTIES serdeprops=tableProperties)? + -> ^(TOK_SERDENAME $name $serdeprops?) + ; + +rowFormatDelimited +@init { msgs.push("serde properties specification"); } +@after { msgs.pop(); } + : + KW_ROW KW_FORMAT KW_DELIMITED tableRowFormatFieldIdentifier? tableRowFormatCollItemsIdentifier? tableRowFormatMapKeysIdentifier? tableRowFormatLinesIdentifier? + -> ^(TOK_SERDEPROPS tableRowFormatFieldIdentifier? tableRowFormatCollItemsIdentifier? tableRowFormatMapKeysIdentifier? tableRowFormatLinesIdentifier?) + ; + +tableRowFormat +@init { msgs.push("table row format specification"); } +@after { msgs.pop(); } + : + rowFormatDelimited + -> ^(TOK_TABLEROWFORMAT rowFormatDelimited) + | rowFormatSerde + -> ^(TOK_TABLESERIALIZER rowFormatSerde) + ; + +tablePropertiesPrefixed +@init { msgs.push("table properties with prefix"); } +@after { msgs.pop(); } + : + KW_TBLPROPERTIES! tableProperties + ; + +tableProperties +@init { msgs.push("table properties"); } +@after { msgs.pop(); } + : + LPAREN tablePropertiesList RPAREN -> ^(TOK_TABLEPROPERTIES tablePropertiesList) + ; + +tablePropertiesList +@init { msgs.push("table properties list"); } +@after { msgs.pop(); } + : + keyValueProperty (COMMA keyValueProperty)* -> ^(TOK_TABLEPROPLIST keyValueProperty+) + ; + +keyValueProperty +@init { msgs.push("specifying key/value property"); } +@after { msgs.pop(); } + : + key=StringLiteral EQUAL value=StringLiteral -> ^(TOK_TABLEPROPERTY $key $value) + ; + +tableRowFormatFieldIdentifier +@init { msgs.push("table row format's field separator"); } +@after { msgs.pop(); } + : + KW_FIELDS KW_TERMINATED KW_BY fldIdnt=StringLiteral (KW_ESCAPED KW_BY fldEscape=StringLiteral)? + -> ^(TOK_TABLEROWFORMATFIELD $fldIdnt $fldEscape?) + ; + +tableRowFormatCollItemsIdentifier +@init { msgs.push("table row format's column separator"); } +@after { msgs.pop(); } + : + KW_COLLECTION KW_ITEMS KW_TERMINATED KW_BY collIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATCOLLITEMS $collIdnt) + ; + +tableRowFormatMapKeysIdentifier +@init { msgs.push("table row format's map key separator"); } +@after { msgs.pop(); } + : + KW_MAP KW_KEYS KW_TERMINATED KW_BY mapKeysIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATMAPKEYS $mapKeysIdnt) + ; + +tableRowFormatLinesIdentifier +@init { msgs.push("table row format's line separator"); } +@after { msgs.pop(); } + : + KW_LINES KW_TERMINATED KW_BY linesIdnt=StringLiteral + -> ^(TOK_TABLEROWFORMATLINES $linesIdnt) + ; + +tableFileFormat +@init { msgs.push("table file format specification"); } +@after { msgs.pop(); } + : + KW_STORED KW_AS KW_SEQUENCEFILE -> TOK_TBLSEQUENCEFILE + | KW_STORED KW_AS KW_TEXTFILE -> TOK_TBLTEXTFILE + | KW_STORED KW_AS KW_RCFILE -> TOK_TBLRCFILE + | KW_STORED KW_AS KW_INPUTFORMAT inFmt=StringLiteral KW_OUTPUTFORMAT outFmt=StringLiteral (KW_INPUTDRIVER inDriver=StringLiteral KW_OUTPUTDRIVER outDriver=StringLiteral)? + -> ^(TOK_TABLEFILEFORMAT $inFmt $outFmt $inDriver? $outDriver?) + | KW_STORED KW_BY storageHandler=StringLiteral + (KW_WITH KW_SERDEPROPERTIES serdeprops=tableProperties)? + -> ^(TOK_STORAGEHANDLER $storageHandler $serdeprops?) + | KW_STORED KW_AS genericSpec=Identifier + -> ^(TOK_FILEFORMAT_GENERIC $genericSpec) + ; + +tableLocation +@init { msgs.push("table location specification"); } +@after { msgs.pop(); } + : + KW_LOCATION locn=StringLiteral -> ^(TOK_TABLELOCATION $locn) + ; + +columnNameTypeList +@init { msgs.push("column name type list"); } +@after { msgs.pop(); } + : columnNameType (COMMA columnNameType)* -> ^(TOK_TABCOLLIST columnNameType+) + ; + +columnNameColonTypeList +@init { msgs.push("column name type list"); } +@after { msgs.pop(); } + : columnNameColonType (COMMA columnNameColonType)* -> ^(TOK_TABCOLLIST columnNameColonType+) + ; + +columnNameList +@init { msgs.push("column name list"); } +@after { msgs.pop(); } + : columnName (COMMA columnName)* -> ^(TOK_TABCOLNAME columnName+) + ; + +columnName +@init { msgs.push("column name"); } +@after { msgs.pop(); } + : + Identifier + ; + +columnNameOrderList +@init { msgs.push("column name order list"); } +@after { msgs.pop(); } + : columnNameOrder (COMMA columnNameOrder)* -> ^(TOK_TABCOLNAME columnNameOrder+) + ; + +skewedValueElement +@init { msgs.push("skewed value element"); } +@after { msgs.pop(); } + : + skewedColumnValues + | skewedColumnValuePairList + ; + +skewedColumnValuePairList +@init { msgs.push("column value pair list"); } +@after { msgs.pop(); } + : skewedColumnValuePair (COMMA skewedColumnValuePair)* -> ^(TOK_TABCOLVALUE_PAIR skewedColumnValuePair+) + ; + +skewedColumnValuePair +@init { msgs.push("column value pair"); } +@after { msgs.pop(); } + : + LPAREN colValues=skewedColumnValues RPAREN + -> ^(TOK_TABCOLVALUES $colValues) + ; + +skewedColumnValues +@init { msgs.push("column values"); } +@after { msgs.pop(); } + : skewedColumnValue (COMMA skewedColumnValue)* -> ^(TOK_TABCOLVALUE skewedColumnValue+) + ; + +skewedColumnValue +@init { msgs.push("column value"); } +@after { msgs.pop(); } + : + constant + ; + +skewedValueLocationElement +@init { msgs.push("skewed value location element"); } +@after { msgs.pop(); } + : + skewedColumnValue + | skewedColumnValuePair + ; + +columnNameOrder +@init { msgs.push("column name order"); } +@after { msgs.pop(); } + : Identifier (asc=KW_ASC | desc=KW_DESC)? + -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC Identifier) + -> ^(TOK_TABSORTCOLNAMEDESC Identifier) + ; + +columnNameCommentList +@init { msgs.push("column name comment list"); } +@after { msgs.pop(); } + : columnNameComment (COMMA columnNameComment)* -> ^(TOK_TABCOLNAME columnNameComment+) + ; + +columnNameComment +@init { msgs.push("column name comment"); } +@after { msgs.pop(); } + : colName=Identifier (KW_COMMENT comment=StringLiteral)? + -> ^(TOK_TABCOL $colName TOK_NULL $comment?) + ; + +columnRefOrder +@init { msgs.push("column order"); } +@after { msgs.pop(); } + : expression (asc=KW_ASC | desc=KW_DESC)? + -> {$desc == null}? ^(TOK_TABSORTCOLNAMEASC expression) + -> ^(TOK_TABSORTCOLNAMEDESC expression) + ; + +columnNameType +@init { msgs.push("column specification"); } +@after { msgs.pop(); } + : colName=Identifier colType (KW_COMMENT comment=StringLiteral)? + -> {$comment == null}? ^(TOK_TABCOL $colName colType) + -> ^(TOK_TABCOL $colName colType $comment) + ; + +columnNameColonType +@init { msgs.push("column specification"); } +@after { msgs.pop(); } + : colName=Identifier COLON colType (KW_COMMENT comment=StringLiteral)? + -> {$comment == null}? ^(TOK_TABCOL $colName colType) + -> ^(TOK_TABCOL $colName colType $comment) + ; + +colType +@init { msgs.push("column type"); } +@after { msgs.pop(); } + : type + ; + +colTypeList +@init { msgs.push("column type list"); } +@after { msgs.pop(); } + : colType (COMMA colType)* -> ^(TOK_COLTYPELIST colType+) + ; + +type + : primitiveType + | listType + | structType + | mapType + | unionType; + +primitiveType +@init { msgs.push("primitive type specification"); } +@after { msgs.pop(); } + : KW_TINYINT -> TOK_TINYINT + | KW_SMALLINT -> TOK_SMALLINT + | KW_INT -> TOK_INT + | KW_BIGINT -> TOK_BIGINT + | KW_BOOLEAN -> TOK_BOOLEAN + | KW_FLOAT -> TOK_FLOAT + | KW_DOUBLE -> TOK_DOUBLE + | KW_DATE -> TOK_DATE + | KW_DATETIME -> TOK_DATETIME + | KW_TIMESTAMP -> TOK_TIMESTAMP + | KW_STRING -> TOK_STRING + | KW_BINARY -> TOK_BINARY + ; + +listType +@init { msgs.push("list type"); } +@after { msgs.pop(); } + : KW_ARRAY LESSTHAN type GREATERTHAN -> ^(TOK_LIST type) + ; + +structType +@init { msgs.push("struct type"); } +@after { msgs.pop(); } + : KW_STRUCT LESSTHAN columnNameColonTypeList GREATERTHAN -> ^(TOK_STRUCT columnNameColonTypeList) + ; + +mapType +@init { msgs.push("map type"); } +@after { msgs.pop(); } + : KW_MAP LESSTHAN left=primitiveType COMMA right=type GREATERTHAN + -> ^(TOK_MAP $left $right) + ; + +unionType +@init { msgs.push("uniontype type"); } +@after { msgs.pop(); } + : KW_UNIONTYPE LESSTHAN colTypeList GREATERTHAN -> ^(TOK_UNIONTYPE colTypeList) + ; + +queryOperator +@init { msgs.push("query operator"); } +@after { msgs.pop(); } + : KW_UNION KW_ALL -> ^(TOK_UNION) + ; + +// select statement select ... from ... where ... group by ... order by ... +queryStatementExpression + : queryStatement (queryOperator^ queryStatement)* + ; + +queryStatement + : + fromClause + ( b+=body )+ -> ^(TOK_QUERY fromClause body+) + | regular_body + ; + +regular_body + : + insertClause + selectClause + fromClause + whereClause? + groupByClause? + havingClause? + orderByClause? + clusterByClause? + distributeByClause? + sortByClause? + limitClause? -> ^(TOK_QUERY fromClause ^(TOK_INSERT insertClause + selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + distributeByClause? sortByClause? limitClause?)) + | + selectStatement + ; + +selectStatement + : + selectClause + fromClause + whereClause? + groupByClause? + havingClause? + orderByClause? + clusterByClause? + distributeByClause? + sortByClause? + limitClause? -> ^(TOK_QUERY fromClause ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + distributeByClause? sortByClause? limitClause?)) + ; + + +body + : + insertClause + selectClause + whereClause? + groupByClause? + havingClause? + orderByClause? + clusterByClause? + distributeByClause? + sortByClause? + limitClause? -> ^(TOK_INSERT insertClause? + selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + distributeByClause? sortByClause? limitClause?) + | + selectClause + whereClause? + groupByClause? + havingClause? + orderByClause? + clusterByClause? + distributeByClause? + sortByClause? + limitClause? -> ^(TOK_INSERT ^(TOK_DESTINATION ^(TOK_DIR TOK_TMP_FILE)) + selectClause whereClause? groupByClause? havingClause? orderByClause? clusterByClause? + distributeByClause? sortByClause? limitClause?) + ; + +insertClause +@init { msgs.push("insert clause"); } +@after { msgs.pop(); } + : + KW_INSERT KW_OVERWRITE destination ifNotExists? -> ^(TOK_DESTINATION destination ifNotExists?) + | KW_INSERT KW_INTO KW_TABLE tableOrPartition + -> ^(TOK_INSERT_INTO ^(tableOrPartition)) + ; + +destination +@init { msgs.push("destination specification"); } +@after { msgs.pop(); } + : + KW_LOCAL KW_DIRECTORY StringLiteral -> ^(TOK_LOCAL_DIR StringLiteral) + | KW_DIRECTORY StringLiteral -> ^(TOK_DIR StringLiteral) + | KW_TABLE tableOrPartition -> ^(tableOrPartition) + ; + +limitClause +@init { msgs.push("limit clause"); } +@after { msgs.pop(); } + : + KW_LIMIT num=Number -> ^(TOK_LIMIT $num) + ; + +//----------------------- Rules for parsing selectClause ----------------------------- +// select a,b,c ... +selectClause +@init { msgs.push("select clause"); } +@after { msgs.pop(); } + : + KW_SELECT hintClause? (((KW_ALL | dist=KW_DISTINCT)? selectList) + | (transform=KW_TRANSFORM selectTrfmClause)) + -> {$transform == null && $dist == null}? ^(TOK_SELECT hintClause? selectList) + -> {$transform == null && $dist != null}? ^(TOK_SELECTDI hintClause? selectList) + -> ^(TOK_SELECT hintClause? ^(TOK_SELEXPR selectTrfmClause) ) + | + trfmClause ->^(TOK_SELECT ^(TOK_SELEXPR trfmClause)) + ; + +selectList +@init { msgs.push("select list"); } +@after { msgs.pop(); } + : + selectItem ( COMMA selectItem )* -> selectItem+ + ; + +selectTrfmClause +@init { msgs.push("transform clause"); } +@after { msgs.pop(); } + : + LPAREN selectExpressionList RPAREN + inSerde=rowFormat inRec=recordWriter + KW_USING StringLiteral + ( KW_AS ((LPAREN (aliasList | columnNameTypeList) RPAREN) | (aliasList | columnNameTypeList)))? + outSerde=rowFormat outRec=recordReader + -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList?) + ; + +hintClause +@init { msgs.push("hint clause"); } +@after { msgs.pop(); } + : + DIVIDE STAR PLUS hintList STAR DIVIDE -> ^(TOK_HINTLIST hintList) + ; + +hintList +@init { msgs.push("hint list"); } +@after { msgs.pop(); } + : + hintItem (COMMA hintItem)* -> hintItem+ + ; + +hintItem +@init { msgs.push("hint item"); } +@after { msgs.pop(); } + : + hintName (LPAREN hintArgs RPAREN)? -> ^(TOK_HINT hintName hintArgs?) + ; + +hintName +@init { msgs.push("hint name"); } +@after { msgs.pop(); } + : + KW_MAPJOIN -> TOK_MAPJOIN + | KW_STREAMTABLE -> TOK_STREAMTABLE + | KW_HOLD_DDLTIME -> TOK_HOLD_DDLTIME + ; + +hintArgs +@init { msgs.push("hint arguments"); } +@after { msgs.pop(); } + : + hintArgName (COMMA hintArgName)* -> ^(TOK_HINTARGLIST hintArgName+) + ; + +hintArgName +@init { msgs.push("hint argument name"); } +@after { msgs.pop(); } + : + Identifier + ; + +selectItem +@init { msgs.push("selection target"); } +@after { msgs.pop(); } + : + ( selectExpression ((KW_AS? Identifier) | (KW_AS LPAREN Identifier (COMMA Identifier)* RPAREN))?) -> ^(TOK_SELEXPR selectExpression Identifier*) + ; + +trfmClause +@init { msgs.push("transform clause"); } +@after { msgs.pop(); } + : + ( KW_MAP selectExpressionList + | KW_REDUCE selectExpressionList ) + inSerde=rowFormat inRec=recordWriter + KW_USING StringLiteral + ( KW_AS ((LPAREN (aliasList | columnNameTypeList) RPAREN) | (aliasList | columnNameTypeList)))? + outSerde=rowFormat outRec=recordReader + -> ^(TOK_TRANSFORM selectExpressionList $inSerde $inRec StringLiteral $outSerde $outRec aliasList? columnNameTypeList?) + ; + +selectExpression +@init { msgs.push("select expression"); } +@after { msgs.pop(); } + : + expression | tableAllColumns + ; + +selectExpressionList +@init { msgs.push("select expression list"); } +@after { msgs.pop(); } + : + selectExpression (COMMA selectExpression)* -> ^(TOK_EXPLIST selectExpression+) + ; + + +//----------------------------------------------------------------------------------- + +tableAllColumns + : STAR + -> ^(TOK_ALLCOLREF) + | tableName DOT STAR + -> ^(TOK_ALLCOLREF tableName) + ; + +// (table|column) +tableOrColumn +@init { msgs.push("table or column identifier"); } +@after { msgs.pop(); } + : + Identifier -> ^(TOK_TABLE_OR_COL Identifier) + ; + +expressionList +@init { msgs.push("expression list"); } +@after { msgs.pop(); } + : + expression (COMMA expression)* -> ^(TOK_EXPLIST expression+) + ; + +aliasList +@init { msgs.push("alias list"); } +@after { msgs.pop(); } + : + Identifier (COMMA Identifier)* -> ^(TOK_ALIASLIST Identifier+) + ; + +//----------------------- Rules for parsing fromClause ------------------------------ +// from [col1, col2, col3] table1, [col4, col5] table2 +fromClause +@init { msgs.push("from clause"); } +@after { msgs.pop(); } + : + KW_FROM joinSource -> ^(TOK_FROM joinSource) + ; + +joinSource +@init { msgs.push("join source"); } +@after { msgs.pop(); } + : fromSource ( joinToken^ fromSource (KW_ON! expression)? )* + | uniqueJoinToken^ uniqueJoinSource (COMMA! uniqueJoinSource)+ + ; + +uniqueJoinSource +@init { msgs.push("join source"); } +@after { msgs.pop(); } + : KW_PRESERVE? fromSource uniqueJoinExpr + ; + +uniqueJoinExpr +@init { msgs.push("unique join expression list"); } +@after { msgs.pop(); } + : LPAREN e1+=expression (COMMA e1+=expression)* RPAREN + -> ^(TOK_EXPLIST $e1*) + ; + +uniqueJoinToken +@init { msgs.push("unique join"); } +@after { msgs.pop(); } + : KW_UNIQUEJOIN -> TOK_UNIQUEJOIN; + +joinToken +@init { msgs.push("join type specifier"); } +@after { msgs.pop(); } + : + KW_JOIN -> TOK_JOIN + | kwInner KW_JOIN -> TOK_JOIN + | KW_CROSS KW_JOIN -> TOK_CROSSJOIN + | KW_LEFT KW_OUTER KW_JOIN -> TOK_LEFTOUTERJOIN + | KW_RIGHT KW_OUTER KW_JOIN -> TOK_RIGHTOUTERJOIN + | KW_FULL KW_OUTER KW_JOIN -> TOK_FULLOUTERJOIN + | KW_LEFT KW_SEMI KW_JOIN -> TOK_LEFTSEMIJOIN + ; + +lateralView +@init {msgs.push("lateral view"); } +@after {msgs.pop(); } + : + KW_LATERAL KW_VIEW function tableAlias KW_AS Identifier (COMMA Identifier)* -> ^(TOK_LATERAL_VIEW ^(TOK_SELECT ^(TOK_SELEXPR function Identifier+ tableAlias))) + ; + +tableAlias +@init {msgs.push("table alias"); } +@after {msgs.pop(); } + : + Identifier -> ^(TOK_TABALIAS Identifier) + ; + +fromSource +@init { msgs.push("from source"); } +@after { msgs.pop(); } + : + (tableSource | subQuerySource) (lateralView^)* + ; + +tableBucketSample +@init { msgs.push("table bucket sample specification"); } +@after { msgs.pop(); } + : + KW_TABLESAMPLE LPAREN KW_BUCKET (numerator=Number) KW_OUT KW_OF (denominator=Number) (KW_ON expr+=expression (COMMA expr+=expression)*)? RPAREN -> ^(TOK_TABLEBUCKETSAMPLE $numerator $denominator $expr*) + ; + +splitSample +@init { msgs.push("table split sample specification"); } +@after { msgs.pop(); } + : + KW_TABLESAMPLE LPAREN (numerator=Number) (percent=KW_PERCENT|KW_ROWS) RPAREN + -> {percent != null}? ^(TOK_TABLESPLITSAMPLE TOK_PERCENT $numerator) + -> ^(TOK_TABLESPLITSAMPLE TOK_ROWCOUNT $numerator) + | + KW_TABLESAMPLE LPAREN (numerator=ByteLengthLiteral) RPAREN + -> ^(TOK_TABLESPLITSAMPLE TOK_LENGTH $numerator) + ; + +tableSample +@init { msgs.push("table sample specification"); } +@after { msgs.pop(); } + : + tableBucketSample | + splitSample + ; + +tableSource +@init { msgs.push("table source"); } +@after { msgs.pop(); } + : tabname=tableName (ts=tableSample)? (alias=Identifier)? + -> ^(TOK_TABREF $tabname $ts? $alias?) + ; + +tableName +@init { msgs.push("table name"); } +@after { msgs.pop(); } + : + db=Identifier DOT tab=Identifier + -> ^(TOK_TABNAME $db $tab) + | + tab=Identifier + -> ^(TOK_TABNAME $tab) + ; + +viewName +@init { msgs.push("view name"); } +@after { msgs.pop(); } + : + (db=Identifier DOT)? view=Identifier + -> ^(TOK_TABNAME $db? $view) + ; + +subQuerySource +@init { msgs.push("subquery source"); } +@after { msgs.pop(); } + : + LPAREN queryStatementExpression RPAREN Identifier -> ^(TOK_SUBQUERY queryStatementExpression Identifier) + ; + +//----------------------- Rules for parsing whereClause ----------------------------- +// where a=b and ... +whereClause +@init { msgs.push("where clause"); } +@after { msgs.pop(); } + : + KW_WHERE searchCondition -> ^(TOK_WHERE searchCondition) + ; + +searchCondition +@init { msgs.push("search condition"); } +@after { msgs.pop(); } + : + expression + ; + +//----------------------------------------------------------------------------------- + +// group by a,b +groupByClause +@init { msgs.push("group by clause"); } +@after { msgs.pop(); } + : + KW_GROUP KW_BY + groupByExpression + ( COMMA groupByExpression )* + ((rollup=KW_WITH KW_ROLLUP) | (cube=KW_WITH KW_CUBE)) ? + (sets=KW_GROUPING KW_SETS + LPAREN groupingSetExpression ( COMMA groupingSetExpression)* RPAREN ) ? + -> {rollup != null}? ^(TOK_ROLLUP_GROUPBY groupByExpression+) + -> {cube != null}? ^(TOK_CUBE_GROUPBY groupByExpression+) + -> {sets != null}? ^(TOK_GROUPING_SETS groupByExpression+ groupingSetExpression+) + -> ^(TOK_GROUPBY groupByExpression+) + ; + +groupingSetExpression +@init {msgs.push("grouping set expression"); } +@after {msgs.pop(); } + : + groupByExpression + -> ^(TOK_GROUPING_SETS_EXPRESSION groupByExpression) + | + LPAREN + groupByExpression (COMMA groupByExpression)* + RPAREN + -> ^(TOK_GROUPING_SETS_EXPRESSION groupByExpression+) + | + LPAREN + RPAREN + -> ^(TOK_GROUPING_SETS_EXPRESSION) + ; + + +groupByExpression +@init { msgs.push("group by expression"); } +@after { msgs.pop(); } + : + expression + ; + +havingClause +@init { msgs.push("having clause"); } +@after { msgs.pop(); } + : + KW_HAVING havingCondition -> ^(TOK_HAVING havingCondition) + ; + +havingCondition +@init { msgs.push("having condition"); } +@after { msgs.pop(); } + : + expression + ; + +// order by a,b +orderByClause +@init { msgs.push("order by clause"); } +@after { msgs.pop(); } + : + KW_ORDER KW_BY + LPAREN columnRefOrder + ( COMMA columnRefOrder)* RPAREN -> ^(TOK_ORDERBY columnRefOrder+) + | + KW_ORDER KW_BY + columnRefOrder + ( COMMA columnRefOrder)* -> ^(TOK_ORDERBY columnRefOrder+) + ; + +clusterByClause +@init { msgs.push("cluster by clause"); } +@after { msgs.pop(); } + : + KW_CLUSTER KW_BY + LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_CLUSTERBY expression+) + | + KW_CLUSTER KW_BY + expression + ( COMMA expression )* -> ^(TOK_CLUSTERBY expression+) + ; + +distributeByClause +@init { msgs.push("distribute by clause"); } +@after { msgs.pop(); } + : + KW_DISTRIBUTE KW_BY + LPAREN expression (COMMA expression)* RPAREN -> ^(TOK_DISTRIBUTEBY expression+) + | + KW_DISTRIBUTE KW_BY + expression (COMMA expression)* -> ^(TOK_DISTRIBUTEBY expression+) + ; + +sortByClause +@init { msgs.push("sort by clause"); } +@after { msgs.pop(); } + : + KW_SORT KW_BY + LPAREN columnRefOrder + ( COMMA columnRefOrder)* RPAREN -> ^(TOK_SORTBY columnRefOrder+) + | + KW_SORT KW_BY + columnRefOrder + ( COMMA columnRefOrder)* -> ^(TOK_SORTBY columnRefOrder+) + ; + +// fun(par1, par2, par3) +function +@init { msgs.push("function specification"); } +@after { msgs.pop(); } + : + functionName + LPAREN + ( + (star=STAR) + | (dist=KW_DISTINCT)? (expression (COMMA expression)*)? + ) + RPAREN -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName) + -> {$dist == null}? ^(TOK_FUNCTION functionName (expression+)?) + -> ^(TOK_FUNCTIONDI functionName (expression+)?) + ; + +functionName +@init { msgs.push("function name"); } +@after { msgs.pop(); } + : // Keyword IF is also a function name + Identifier | KW_IF | KW_ARRAY | KW_MAP | KW_STRUCT | KW_UNIONTYPE + ; + +castExpression +@init { msgs.push("cast expression"); } +@after { msgs.pop(); } + : + KW_CAST + LPAREN + expression + KW_AS + primitiveType + RPAREN -> ^(TOK_FUNCTION primitiveType expression) + ; + +caseExpression +@init { msgs.push("case expression"); } +@after { msgs.pop(); } + : + KW_CASE expression + (KW_WHEN expression KW_THEN expression)+ + (KW_ELSE expression)? + KW_END -> ^(TOK_FUNCTION KW_CASE expression*) + ; + +whenExpression +@init { msgs.push("case expression"); } +@after { msgs.pop(); } + : + KW_CASE + ( KW_WHEN expression KW_THEN expression)+ + (KW_ELSE expression)? + KW_END -> ^(TOK_FUNCTION KW_WHEN expression*) + ; + +constant +@init { msgs.push("constant"); } +@after { msgs.pop(); } + : + Number + | StringLiteral + | stringLiteralSequence + | BigintLiteral + | SmallintLiteral + | TinyintLiteral + | charSetStringLiteral + | booleanValue + ; + +stringLiteralSequence + : + StringLiteral StringLiteral+ -> ^(TOK_STRINGLITERALSEQUENCE StringLiteral StringLiteral+) + ; + +charSetStringLiteral +@init { msgs.push("character string literal"); } +@after { msgs.pop(); } + : + csName=CharSetName csLiteral=CharSetLiteral -> ^(TOK_CHARSETLITERAL $csName $csLiteral) + ; + +expression +@init { msgs.push("expression specification"); } +@after { msgs.pop(); } + : + precedenceOrExpression + ; + +atomExpression + : + KW_NULL -> TOK_NULL + | constant + | function + | castExpression + | caseExpression + | whenExpression + | tableOrColumn + | LPAREN! expression RPAREN! + ; + + +precedenceFieldExpression + : + atomExpression ((LSQUARE^ expression RSQUARE!) | (DOT^ Identifier))* + ; + +precedenceUnaryOperator + : + PLUS | MINUS | TILDE + ; + +nullCondition + : + KW_NULL -> ^(TOK_ISNULL) + | KW_NOT KW_NULL -> ^(TOK_ISNOTNULL) + ; + +precedenceUnaryPrefixExpression + : + (precedenceUnaryOperator^)* precedenceFieldExpression + ; + +precedenceUnarySuffixExpression + : precedenceUnaryPrefixExpression (a=KW_IS nullCondition)? + -> {$a != null}? ^(TOK_FUNCTION nullCondition precedenceUnaryPrefixExpression) + -> precedenceUnaryPrefixExpression + ; + + +precedenceBitwiseXorOperator + : + BITWISEXOR + ; + +precedenceBitwiseXorExpression + : + precedenceUnarySuffixExpression (precedenceBitwiseXorOperator^ precedenceUnarySuffixExpression)* + ; + + +precedenceStarOperator + : + STAR | DIVIDE | MOD | DIV + ; + +precedenceStarExpression + : + precedenceBitwiseXorExpression (precedenceStarOperator^ precedenceBitwiseXorExpression)* + ; + + +precedencePlusOperator + : + PLUS | MINUS + ; + +precedencePlusExpression + : + precedenceStarExpression (precedencePlusOperator^ precedenceStarExpression)* + ; + + +precedenceAmpersandOperator + : + AMPERSAND + ; + +precedenceAmpersandExpression + : + precedencePlusExpression (precedenceAmpersandOperator^ precedencePlusExpression)* + ; + + +precedenceBitwiseOrOperator + : + BITWISEOR + ; + +precedenceBitwiseOrExpression + : + precedenceAmpersandExpression (precedenceBitwiseOrOperator^ precedenceAmpersandExpression)* + ; + + +// Equal operators supporting NOT prefix +precedenceEqualNegatableOperator + : + KW_LIKE | KW_RLIKE | KW_REGEXP + ; + +precedenceEqualOperator + : + precedenceEqualNegatableOperator | EQUAL | EQUAL_NS | NOTEQUAL | LESSTHANOREQUALTO | LESSTHAN | GREATERTHANOREQUALTO | GREATERTHAN + ; + +precedenceEqualExpression + : + (left=precedenceBitwiseOrExpression -> $left) + ( + (KW_NOT precedenceEqualNegatableOperator notExpr=precedenceBitwiseOrExpression) + -> ^(KW_NOT ^(precedenceEqualNegatableOperator $precedenceEqualExpression $notExpr)) + | (precedenceEqualOperator equalExpr=precedenceBitwiseOrExpression) + -> ^(precedenceEqualOperator $precedenceEqualExpression $equalExpr) + | (KW_NOT KW_IN expressions) + -> ^(KW_NOT ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions)) + | (KW_IN expressions) + -> ^(TOK_FUNCTION KW_IN $precedenceEqualExpression expressions) + | ( KW_NOT KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) + -> ^(TOK_FUNCTION Identifier["between"] KW_TRUE $left $min $max) + | ( KW_BETWEEN (min=precedenceBitwiseOrExpression) KW_AND (max=precedenceBitwiseOrExpression) ) + -> ^(TOK_FUNCTION Identifier["between"] KW_FALSE $left $min $max) + )* + ; + +expressions + : + LPAREN expression (COMMA expression)* RPAREN -> expression* + ; + +precedenceNotOperator + : + KW_NOT + ; + +precedenceNotExpression + : + (precedenceNotOperator^)* precedenceEqualExpression + ; + + +precedenceAndOperator + : + KW_AND + ; + +precedenceAndExpression + : + precedenceNotExpression (precedenceAndOperator^ precedenceNotExpression)* + ; + + +precedenceOrOperator + : + KW_OR + ; + +precedenceOrExpression + : + precedenceAndExpression (precedenceOrOperator^ precedenceAndExpression)* + ; + + +booleanValue + : + KW_TRUE^ | KW_FALSE^ + ; + +tableOrPartition + : + tableName partitionSpec? -> ^(TOK_TAB tableName partitionSpec?) + ; + +partitionSpec + : + KW_PARTITION + LPAREN partitionVal (COMMA partitionVal )* RPAREN -> ^(TOK_PARTSPEC partitionVal +) + ; + +partitionVal + : + Identifier (EQUAL constant)? -> ^(TOK_PARTVAL Identifier constant?) + ; + +dropPartitionSpec + : + KW_PARTITION + LPAREN dropPartitionVal (COMMA dropPartitionVal )* RPAREN -> ^(TOK_PARTSPEC dropPartitionVal +) + ; + +dropPartitionVal + : + Identifier dropPartitionOperator constant -> ^(TOK_PARTVAL Identifier dropPartitionOperator constant) + ; + +dropPartitionOperator + : + EQUAL | NOTEQUAL | LESSTHANOREQUALTO | LESSTHAN | GREATERTHANOREQUALTO | GREATERTHAN + ; + +sysFuncNames + : + KW_AND + | KW_OR + | KW_NOT + | KW_LIKE + | KW_IF + | KW_CASE + | KW_WHEN + | KW_TINYINT + | KW_SMALLINT + | KW_INT + | KW_BIGINT + | KW_FLOAT + | KW_DOUBLE + | KW_BOOLEAN + | KW_STRING + | KW_BINARY + | KW_ARRAY + | KW_MAP + | KW_STRUCT + | KW_UNIONTYPE + | EQUAL + | EQUAL_NS + | NOTEQUAL + | LESSTHANOREQUALTO + | LESSTHAN + | GREATERTHANOREQUALTO + | GREATERTHAN + | DIVIDE + | PLUS + | MINUS + | STAR + | MOD + | DIV + | AMPERSAND + | TILDE + | BITWISEOR + | BITWISEXOR + | KW_RLIKE + | KW_REGEXP + | KW_IN + | KW_BETWEEN + ; + +descFuncNames + : + sysFuncNames + | StringLiteral + | Identifier + ; + +// Keywords + +kwUser +: +{input.LT(1).getText().equalsIgnoreCase("user")}? Identifier; + +kwRole +: +{input.LT(1).getText().equalsIgnoreCase("role")}? Identifier; + +kwInner +: +{input.LT(1).getText().equalsIgnoreCase("inner")}? Identifier; + +KW_TRUE : 'TRUE'; +KW_FALSE : 'FALSE'; +KW_ALL : 'ALL'; +KW_AND : 'AND'; +KW_OR : 'OR'; +KW_NOT : 'NOT' | '!'; +KW_LIKE : 'LIKE'; + +KW_IF : 'IF'; +KW_EXISTS : 'EXISTS'; + +KW_ASC : 'ASC'; +KW_DESC : 'DESC'; +KW_ORDER : 'ORDER'; +KW_GROUP : 'GROUP'; +KW_BY : 'BY'; +KW_HAVING : 'HAVING'; +KW_WHERE : 'WHERE'; +KW_FROM : 'FROM'; +KW_AS : 'AS'; +KW_SELECT : 'SELECT'; +KW_DISTINCT : 'DISTINCT'; +KW_INSERT : 'INSERT'; +KW_OVERWRITE : 'OVERWRITE'; +KW_OUTER : 'OUTER'; +KW_UNIQUEJOIN : 'UNIQUEJOIN'; +KW_PRESERVE : 'PRESERVE'; +KW_JOIN : 'JOIN'; +KW_LEFT : 'LEFT'; +KW_RIGHT : 'RIGHT'; +KW_FULL : 'FULL'; +KW_ON : 'ON'; +KW_PARTITION : 'PARTITION'; +KW_PARTITIONS : 'PARTITIONS'; +KW_TABLE: 'TABLE'; +KW_TABLES: 'TABLES'; +KW_COLUMNS: 'COLUMNS'; +KW_INDEX: 'INDEX'; +KW_INDEXES: 'INDEXES'; +KW_REBUILD: 'REBUILD'; +KW_FUNCTIONS: 'FUNCTIONS'; +KW_SHOW: 'SHOW'; +KW_MSCK: 'MSCK'; +KW_REPAIR: 'REPAIR'; +KW_DIRECTORY: 'DIRECTORY'; +KW_LOCAL: 'LOCAL'; +KW_TRANSFORM : 'TRANSFORM'; +KW_USING: 'USING'; +KW_CLUSTER: 'CLUSTER'; +KW_DISTRIBUTE: 'DISTRIBUTE'; +KW_SORT: 'SORT'; +KW_UNION: 'UNION'; +KW_LOAD: 'LOAD'; +KW_EXPORT: 'EXPORT'; +KW_IMPORT: 'IMPORT'; +KW_DATA: 'DATA'; +KW_INPATH: 'INPATH'; +KW_IS: 'IS'; +KW_NULL: 'NULL'; +KW_CREATE: 'CREATE'; +KW_EXTERNAL: 'EXTERNAL'; +KW_ALTER: 'ALTER'; +KW_CHANGE: 'CHANGE'; +KW_COLUMN: 'COLUMN'; +KW_FIRST: 'FIRST'; +KW_AFTER: 'AFTER'; +KW_DESCRIBE: 'DESCRIBE'; +KW_DROP: 'DROP'; +KW_RENAME: 'RENAME'; +KW_IGNORE: 'IGNORE'; +KW_PROTECTION: 'PROTECTION'; +KW_TO: 'TO'; +KW_COMMENT: 'COMMENT'; +KW_BOOLEAN: 'BOOLEAN'; +KW_TINYINT: 'TINYINT'; +KW_SMALLINT: 'SMALLINT'; +KW_INT: 'INT'; +KW_BIGINT: 'BIGINT'; +KW_FLOAT: 'FLOAT'; +KW_DOUBLE: 'DOUBLE'; +KW_DATE: 'DATE'; +KW_DATETIME: 'DATETIME'; +KW_TIMESTAMP: 'TIMESTAMP'; +KW_STRING: 'STRING'; +KW_ARRAY: 'ARRAY'; +KW_STRUCT: 'STRUCT'; +KW_MAP: 'MAP'; +KW_UNIONTYPE: 'UNIONTYPE'; +KW_REDUCE: 'REDUCE'; +KW_PARTITIONED: 'PARTITIONED'; +KW_CLUSTERED: 'CLUSTERED'; +KW_SORTED: 'SORTED'; +KW_INTO: 'INTO'; +KW_BUCKETS: 'BUCKETS'; +KW_ROW: 'ROW'; +KW_ROWS: 'ROWS'; +KW_FORMAT: 'FORMAT'; +KW_DELIMITED: 'DELIMITED'; +KW_FIELDS: 'FIELDS'; +KW_TERMINATED: 'TERMINATED'; +KW_ESCAPED: 'ESCAPED'; +KW_COLLECTION: 'COLLECTION'; +KW_ITEMS: 'ITEMS'; +KW_KEYS: 'KEYS'; +KW_KEY_TYPE: '$KEY$'; +KW_LINES: 'LINES'; +KW_STORED: 'STORED'; +KW_FILEFORMAT: 'FILEFORMAT'; +KW_SEQUENCEFILE: 'SEQUENCEFILE'; +KW_TEXTFILE: 'TEXTFILE'; +KW_RCFILE: 'RCFILE'; +KW_INPUTFORMAT: 'INPUTFORMAT'; +KW_OUTPUTFORMAT: 'OUTPUTFORMAT'; +KW_INPUTDRIVER: 'INPUTDRIVER'; +KW_OUTPUTDRIVER: 'OUTPUTDRIVER'; +KW_OFFLINE: 'OFFLINE'; +KW_ENABLE: 'ENABLE'; +KW_DISABLE: 'DISABLE'; +KW_READONLY: 'READONLY'; +KW_NO_DROP: 'NO_DROP'; +KW_LOCATION: 'LOCATION'; +KW_TABLESAMPLE: 'TABLESAMPLE'; +KW_BUCKET: 'BUCKET'; +KW_OUT: 'OUT'; +KW_OF: 'OF'; +KW_PERCENT: 'PERCENT'; +KW_CAST: 'CAST'; +KW_ADD: 'ADD'; +KW_REPLACE: 'REPLACE'; +KW_RLIKE: 'RLIKE'; +KW_REGEXP: 'REGEXP'; +KW_TEMPORARY: 'TEMPORARY'; +KW_FUNCTION: 'FUNCTION'; +KW_EXPLAIN: 'EXPLAIN'; +KW_EXTENDED: 'EXTENDED'; +KW_FORMATTED: 'FORMATTED'; +KW_DEPENDENCY: 'DEPENDENCY'; +KW_SERDE: 'SERDE'; +KW_WITH: 'WITH'; +KW_DEFERRED: 'DEFERRED'; +KW_SERDEPROPERTIES: 'SERDEPROPERTIES'; +KW_DBPROPERTIES: 'DBPROPERTIES'; +KW_LIMIT: 'LIMIT'; +KW_SET: 'SET'; +KW_TBLPROPERTIES: 'TBLPROPERTIES'; +KW_IDXPROPERTIES: 'IDXPROPERTIES'; +KW_VALUE_TYPE: '$VALUE$'; +KW_ELEM_TYPE: '$ELEM$'; +KW_CASE: 'CASE'; +KW_WHEN: 'WHEN'; +KW_THEN: 'THEN'; +KW_ELSE: 'ELSE'; +KW_END: 'END'; +KW_MAPJOIN: 'MAPJOIN'; +KW_STREAMTABLE: 'STREAMTABLE'; +KW_HOLD_DDLTIME: 'HOLD_DDLTIME'; +KW_CLUSTERSTATUS: 'CLUSTERSTATUS'; +KW_UTC: 'UTC'; +KW_UTCTIMESTAMP: 'UTC_TMESTAMP'; +KW_LONG: 'LONG'; +KW_DELETE: 'DELETE'; +KW_PLUS: 'PLUS'; +KW_MINUS: 'MINUS'; +KW_FETCH: 'FETCH'; +KW_INTERSECT: 'INTERSECT'; +KW_VIEW: 'VIEW'; +KW_IN: 'IN'; +KW_DATABASE: 'DATABASE'; +KW_DATABASES: 'DATABASES'; +KW_MATERIALIZED: 'MATERIALIZED'; +KW_SCHEMA: 'SCHEMA'; +KW_SCHEMAS: 'SCHEMAS'; +KW_GRANT: 'GRANT'; +KW_REVOKE: 'REVOKE'; +KW_SSL: 'SSL'; +KW_UNDO: 'UNDO'; +KW_LOCK: 'LOCK'; +KW_LOCKS: 'LOCKS'; +KW_UNLOCK: 'UNLOCK'; +KW_SHARED: 'SHARED'; +KW_EXCLUSIVE: 'EXCLUSIVE'; +KW_PROCEDURE: 'PROCEDURE'; +KW_UNSIGNED: 'UNSIGNED'; +KW_WHILE: 'WHILE'; +KW_READ: 'READ'; +KW_READS: 'READS'; +KW_PURGE: 'PURGE'; +KW_RANGE: 'RANGE'; +KW_ANALYZE: 'ANALYZE'; +KW_BEFORE: 'BEFORE'; +KW_BETWEEN: 'BETWEEN'; +KW_BOTH: 'BOTH'; +KW_BINARY: 'BINARY'; +KW_CROSS: 'CROSS'; +KW_CONTINUE: 'CONTINUE'; +KW_CURSOR: 'CURSOR'; +KW_TRIGGER: 'TRIGGER'; +KW_RECORDREADER: 'RECORDREADER'; +KW_RECORDWRITER: 'RECORDWRITER'; +KW_SEMI: 'SEMI'; +KW_LATERAL: 'LATERAL'; +KW_TOUCH: 'TOUCH'; +KW_ARCHIVE: 'ARCHIVE'; +KW_UNARCHIVE: 'UNARCHIVE'; +KW_COMPUTE: 'COMPUTE'; +KW_STATISTICS: 'STATISTICS'; +KW_USE: 'USE'; +KW_OPTION: 'OPTION'; +KW_CONCATENATE: 'CONCATENATE'; +KW_SHOW_DATABASE: 'SHOW_DATABASE'; +KW_UPDATE: 'UPDATE'; +KW_RESTRICT: 'RESTRICT'; +KW_CASCADE: 'CASCADE'; +KW_SKEWED: 'SKEWED'; +KW_ROLLUP: 'ROLLUP'; +KW_CUBE: 'CUBE'; +KW_DIRECTORIES: 'DIRECTORIES'; +KW_FOR: 'FOR'; +KW_GROUPING: 'GROUPING'; +KW_SETS: 'SETS'; + +// Operators +// NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work. + +DOT : '.'; // generated as a part of Number rule +COLON : ':' ; +COMMA : ',' ; +SEMICOLON : ';' ; + +LPAREN : '(' ; +RPAREN : ')' ; +LSQUARE : '[' ; +RSQUARE : ']' ; +LCURLY : '{'; +RCURLY : '}'; + +EQUAL : '=' | '=='; +EQUAL_NS : '<=>'; +NOTEQUAL : '<>' | '!='; +LESSTHANOREQUALTO : '<='; +LESSTHAN : '<'; +GREATERTHANOREQUALTO : '>='; +GREATERTHAN : '>'; + +DIVIDE : '/'; +PLUS : '+'; +MINUS : '-'; +STAR : '*'; +MOD : '%'; +DIV : 'DIV'; + +AMPERSAND : '&'; +TILDE : '~'; +BITWISEOR : '|'; +BITWISEXOR : '^'; +QUESTION : '?'; +DOLLAR : '$'; + +// LITERALS +fragment +Letter + : 'a'..'z' | 'A'..'Z' + ; + +fragment +HexDigit + : 'a'..'f' | 'A'..'F' + ; + +fragment +Digit + : + '0'..'9' + ; + +fragment +Exponent + : + ('e' | 'E') ( PLUS|MINUS )? (Digit)+ + ; + +fragment +RegexComponent + : 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' + | PLUS | STAR | QUESTION | MINUS | DOT + | LPAREN | RPAREN | LSQUARE | RSQUARE | LCURLY | RCURLY + | BITWISEXOR | BITWISEOR | DOLLAR + ; + +StringLiteral + : + ( '\'' ( ~('\''|'\\') | ('\\' .) )* '\'' + | '\"' ( ~('\"'|'\\') | ('\\' .) )* '\"' + )+ + ; + +CharSetLiteral + : + StringLiteral + | '0' 'X' (HexDigit|Digit)+ + ; + +BigintLiteral + : + (Digit)+ 'L' + ; + +SmallintLiteral + : + (Digit)+ 'S' + ; + +TinyintLiteral + : + (Digit)+ 'Y' + ; + +ByteLengthLiteral + : + (Digit)+ ('b' | 'B' | 'k' | 'K' | 'm' | 'M' | 'g' | 'G') + ; + +Number + : + (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)? + ; + +Identifier + : + (Letter | Digit) (Letter | Digit | '_')* + | '`' RegexComponent+ '`' + ; + +CharSetName + : + '_' (Letter | Digit | '_' | '-' | '.' | ':' )+ + ; + +WS : (' '|'\r'|'\t'|'\n') {$channel=HIDDEN;} + ; + +COMMENT + : '--' (~('\n'|'\r'))* + { $channel=HIDDEN; } + ; + diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java index 38e4a4f..c267ad5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java @@ -501,6 +501,8 @@ public final class TypeCheckProcFactory { serdeConstants.BINARY_TYPE_NAME); conversionFunctionTextHashMap.put(HiveParser.TOK_TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME); + conversionFunctionTextHashMap.put(HiveParser.TOK_DECIMAL, + serdeConstants.DECIMAL_TYPE_NAME); } public static boolean isRedundantConversionFunction(ASTNode expr, diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java index 41043bc..bfce482 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFAbs.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -38,6 +39,7 @@ public class UDFAbs extends UDF { private final DoubleWritable resultDouble = new DoubleWritable(); private final LongWritable resultLong = new LongWritable(); private final IntWritable resultInt = new IntWritable(); + private final BigDecimalWritable resultBigDecimal = new BigDecimalWritable(); public DoubleWritable evaluate(DoubleWritable n) { if (n == null) { @@ -68,4 +70,13 @@ public class UDFAbs extends UDF { return resultInt; } + + public BigDecimalWritable evaluate(BigDecimalWritable n) { + if (n == null) { + return null; + } + + resultBigDecimal.set(n.getBigDecimal().abs()); + return resultBigDecimal; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java index 63d0255..14c16ec 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericOp.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.NumericOpMethodResolver; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -48,6 +49,7 @@ public abstract class UDFBaseNumericOp extends UDF { protected LongWritable longWritable = new LongWritable(); protected FloatWritable floatWritable = new FloatWritable(); protected DoubleWritable doubleWritable = new DoubleWritable(); + protected BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public abstract ByteWritable evaluate(ByteWritable a, ByteWritable b); @@ -61,4 +63,5 @@ public abstract class UDFBaseNumericOp extends UDF { public abstract DoubleWritable evaluate(DoubleWritable a, DoubleWritable b); + public abstract BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java index b220805..cb7dca4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFBaseNumericUnaryOp.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -45,6 +46,7 @@ public abstract class UDFBaseNumericUnaryOp extends UDF { protected LongWritable longWritable = new LongWritable(); protected FloatWritable floatWritable = new FloatWritable(); protected DoubleWritable doubleWritable = new DoubleWritable(); + protected BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public abstract ByteWritable evaluate(ByteWritable a); @@ -58,4 +60,5 @@ public abstract class UDFBaseNumericUnaryOp extends UDF { public abstract DoubleWritable evaluate(DoubleWritable a); + public abstract BigDecimalWritable evaluate(BigDecimalWritable a); } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java index 01dd4d6..d40a1eb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCeil.java @@ -18,8 +18,13 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; @@ -34,7 +39,8 @@ import org.apache.hadoop.io.LongWritable; + " 0\n" + " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + " 5") public class UDFCeil extends UDF { - private LongWritable longWritable = new LongWritable(); + private final LongWritable longWritable = new LongWritable(); + private final BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public UDFCeil() { } @@ -48,4 +54,15 @@ public class UDFCeil extends UDF { } } + public BigDecimalWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + BigDecimal bd = i.getBigDecimal(); + MathContext mc = new MathContext( + bd.precision() - bd.scale(), RoundingMode.CEILING); + bigDecimalWritable.set(bd.round(mc)); + return bigDecimalWritable; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java index 35b0987..10ae445 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFExp.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -31,7 +32,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n " + " > SELECT _FUNC_(0) FROM src LIMIT 1;\n" + " 1") public class UDFExp extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFExp() { } @@ -48,4 +49,12 @@ public class UDFExp extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + result.set(Math.exp(a.getBigDecimal().doubleValue())); + return result; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java index 510a161..397a86e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFFloor.java @@ -18,8 +18,13 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; +import java.math.MathContext; +import java.math.RoundingMode; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; @@ -34,7 +39,8 @@ import org.apache.hadoop.io.LongWritable; + " -1\n" + " > SELECT _FUNC_(5) FROM src LIMIT 1;\n" + " 5") public class UDFFloor extends UDF { - private LongWritable result = new LongWritable(); + private final LongWritable result = new LongWritable(); + private final BigDecimalWritable bdResult = new BigDecimalWritable(); public UDFFloor() { } @@ -48,4 +54,15 @@ public class UDFFloor extends UDF { } } + public BigDecimalWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + BigDecimal bd = i.getBigDecimal(); + MathContext mc = new MathContext( + bd.precision() - bd.scale(), RoundingMode.FLOOR); + bdResult.set(bd.round(mc)); + return bdResult; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java index 6aa7f6a..b153fbf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -31,7 +32,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n" + " > SELECT _FUNC_(13, 13) FROM src LIMIT 1;\n" + " 1") public class UDFLog extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFLog() { } @@ -48,6 +49,19 @@ public class UDFLog extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } + result.set(Math.log(v)); + return result; + } + } + /** * Returns the logarithm of "a" with base "base". */ @@ -60,4 +74,19 @@ public class UDFLog extends UDF { } } + /** + * Returns the logarithm of "a" with base "base". + */ + public DoubleWritable evaluate(DoubleWritable base, BigDecimalWritable a) { + if (a == null || base == null || base.get() <= 1.0) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v <= 0.0) { + return null; + } + result.set(Math.log(v) / Math.log(base.get())); + return result; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java index a6e7c5e..d49384e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog10.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -33,7 +34,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; public class UDFLog10 extends UDF { private static double log10 = Math.log(10.0); - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFLog10() { } @@ -50,4 +51,20 @@ public class UDFLog10 extends UDF { } } + /** + * Returns the logarithm of "a" with base 10. + */ + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } + result.set(Math.log(v) / log10); + return result; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java index 87c6d22..1d3af4b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFLog2.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -33,7 +34,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; public class UDFLog2 extends UDF { private static double log2 = Math.log(2.0); - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFLog2() { } @@ -50,4 +51,17 @@ public class UDFLog2 extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } else { + double v = a.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } + result.set(Math.log(v) / log2); + return result; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java index 0455aa9..8d0a059 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPDivide.java @@ -18,8 +18,11 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -29,12 +32,13 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; @Description(name = "/", value = "a _FUNC_ b - Divide a by b", extended = "Example:\n" + " > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + " 1.5") /** - * Note that in SQL, the return type of divide is not necessarily the same + * Note that in SQL, the return type of divide is not necessarily the same * as the parameters. For example, 3 / 2 = 1.5, not 1. To follow SQL, we always * return a double for divide. */ public class UDFOPDivide extends UDF { - private DoubleWritable doubleWritable = new DoubleWritable(); + private final DoubleWritable doubleWritable = new DoubleWritable(); + private final BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); public DoubleWritable evaluate(DoubleWritable a, DoubleWritable b) { // LOG.info("Get input " + a.getClass() + ":" + a + " " + b.getClass() + ":" @@ -46,4 +50,18 @@ public class UDFOPDivide extends UDF { doubleWritable.set(a.get() / b.get()); return doubleWritable; } + + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + if (b.getBigDecimal().compareTo(BigDecimal.ZERO) == 0) { + return null; + //bigDecimalWritable.set((BigDecimal)null); + } else { + bigDecimalWritable.set(a.getBigDecimal().divide(b.getBigDecimal())); + } + + return bigDecimalWritable; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java index 8ed1cc6..f884b9a 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMinus.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -107,4 +108,15 @@ public class UDFOPMinus extends UDFBaseNumericOp { doubleWritable.set(a.get() - b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().subtract(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java index 1935f03..95bbf45 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMod.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -107,4 +108,14 @@ public class UDFOPMod extends UDFBaseNumericOp { doubleWritable.set(a.get() % b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().remainder(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java index ce2c515..9058651 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPMultiply.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -107,4 +108,14 @@ public class UDFOPMultiply extends UDFBaseNumericOp { doubleWritable.set(a.get() * b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().multiply(b.getBigDecimal())); + return bigDecimalWritable; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java index 2378df2..3c14fef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPNegative.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -90,4 +91,13 @@ public class UDFOPNegative extends UDFBaseNumericUnaryOp { return doubleWritable; } + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a) { + if (a == null) { + return null; + } + bigDecimalWritable.set(a.getBigDecimal().negate()); + return bigDecimalWritable; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java index 705c6eb..5722d8b 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPlus.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -113,4 +114,14 @@ public class UDFOPPlus extends UDFBaseNumericOp { return doubleWritable; } + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + bigDecimalWritable.set(a.getBigDecimal().add(b.getBigDecimal())); + return bigDecimalWritable; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java index c2c45e4..0711890 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFOPPositive.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -66,4 +67,9 @@ public class UDFOPPositive extends UDFBaseNumericUnaryOp { return a; } + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a) { + return a; + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java index 3b86e9c..4467a90 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPosMod.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; + import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -108,4 +111,16 @@ public class UDFPosMod extends UDFBaseNumericOp { doubleWritable.set(((a.get() % b.get()) + b.get()) % b.get()); return doubleWritable; } + + @Override + public BigDecimalWritable evaluate(BigDecimalWritable a, BigDecimalWritable b) { + if ((a == null) || (b == null)) { + return null; + } + + BigDecimal av = a.getBigDecimal(); + BigDecimal bv = b.getBigDecimal(); + bigDecimalWritable.set(av.remainder(bv).add(bv).remainder(bv)); + return bigDecimalWritable; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java index 197adbb..9e211a3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFPower.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -31,7 +32,8 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n" + " > SELECT _FUNC_(2, 3) FROM src LIMIT 1;\n" + " 8") public class UDFPower extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable resultDouble = new DoubleWritable(); + private final BigDecimalWritable resultBigDecimal = new BigDecimalWritable(); public UDFPower() { } @@ -43,9 +45,8 @@ public class UDFPower extends UDF { if (a == null || b == null) { return null; } else { - result.set(Math.pow(a.get(), b.get())); - return result; + resultDouble.set(Math.pow(a.get(), b.get())); + return resultDouble; } } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java index 892c0d3..29f13a4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFRound.java @@ -23,6 +23,7 @@ import java.math.RoundingMode; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.io.IntWritable; @@ -35,6 +36,7 @@ import org.apache.hadoop.io.IntWritable; extended = "Example:\n" + " > SELECT _FUNC_(12.3456, 1) FROM src LIMIT 1;\n" + " 12.3'") public class UDFRound extends UDF { + private final BigDecimalWritable bigDecimalWritable = new BigDecimalWritable(); private final DoubleWritable doubleWritable = new DoubleWritable(); public UDFRound() { @@ -65,4 +67,30 @@ public class UDFRound extends UDF { return evaluate(n, i.get()); } + private BigDecimalWritable evaluate(BigDecimalWritable n, int i) { + if (n == null) { + return null; + } + BigDecimal bd = n.getBigDecimal().setScale(i, RoundingMode.HALF_UP); + /* + int setPrecision = bd.precision() - (bd.scale() - i); + MathContext mc = new MathContext((setPrecision > -1 ? setPrecision : 0), + RoundingMode.HALF_UP); + bigDecimalWritable.set(bd.round(mc)); + */ + bigDecimalWritable.set(bd); + return bigDecimalWritable; + } + + public BigDecimalWritable evaluate(BigDecimalWritable n) { + return evaluate(n, 0); + } + + public BigDecimalWritable evaluate(BigDecimalWritable n, IntWritable i) { + if (i == null) { + return null; + } + return evaluate(n, i.get()); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java index 6717e9d..67343ae 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSqrt.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; /** @@ -30,7 +31,7 @@ import org.apache.hadoop.hive.serde2.io.DoubleWritable; extended = "Example:\n " + " > SELECT _FUNC_(4) FROM src LIMIT 1;\n" + " 2") public class UDFSqrt extends UDF { - private DoubleWritable result = new DoubleWritable(); + private final DoubleWritable result = new DoubleWritable(); public UDFSqrt() { } @@ -49,4 +50,17 @@ public class UDFSqrt extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + double v = i.getBigDecimal().doubleValue(); + if (v < 0) { + return null; + } else { + result.set(Math.sqrt(i.getBigDecimal().doubleValue())); + return result; + } + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java index f3afd33..e7001ae 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToBoolean.java @@ -18,7 +18,10 @@ package org.apache.hadoop.hive.ql.udf; +import java.math.BigDecimal; + import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -35,7 +38,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToBoolean extends UDF { - private BooleanWritable booleanWritable = new BooleanWritable(); + private final BooleanWritable booleanWritable = new BooleanWritable(); public UDFToBoolean() { } @@ -172,4 +175,13 @@ public class UDFToBoolean extends UDF { } } + public BooleanWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + booleanWritable.set(!BigDecimal.ZERO.equals(i.getBigDecimal())); + return booleanWritable; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java index 1b3b744..c5830ea 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToByte.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -36,7 +37,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToByte extends UDF { - private ByteWritable byteWritable = new ByteWritable(); + private final ByteWritable byteWritable = new ByteWritable(); public UDFToByte() { } @@ -181,4 +182,12 @@ public class UDFToByte extends UDF { } } + public ByteWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + byteWritable.set(i.getBigDecimal().byteValue()); + return byteWritable; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java index ce4660c..c57e31e 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToDouble.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -34,7 +35,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToDouble extends UDF { - private DoubleWritable doubleWritable = new DoubleWritable(); + private final DoubleWritable doubleWritable = new DoubleWritable(); public UDFToDouble() { } @@ -183,4 +184,12 @@ public class UDFToDouble extends UDF { } } + public DoubleWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + doubleWritable.set(i.getBigDecimal().doubleValue()); + return doubleWritable; + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java index c6b197e..61591e9 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToFloat.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -35,7 +36,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToFloat extends UDF { - private FloatWritable floatWritable = new FloatWritable(); + private final FloatWritable floatWritable = new FloatWritable(); public UDFToFloat() { } @@ -184,4 +185,13 @@ public class UDFToFloat extends UDF { } } + public FloatWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + floatWritable.set(i.getBigDecimal().floatValue()); + return floatWritable; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java index 9b9d7df..018b3de 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToInteger.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -36,7 +37,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToInteger extends UDF { - private IntWritable intWritable = new IntWritable(); + private final IntWritable intWritable = new IntWritable(); public UDFToInteger() { } @@ -188,4 +189,13 @@ public class UDFToInteger extends UDF { } } + public IntWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + intWritable.set(i.getBigDecimal().intValue()); + return intWritable; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java index c7ea66d..426bc64 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToLong.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -36,7 +37,7 @@ import org.apache.hadoop.io.Text; * */ public class UDFToLong extends UDF { - private LongWritable longWritable = new LongWritable(); + private final LongWritable longWritable = new LongWritable(); public UDFToLong() { } @@ -192,4 +193,13 @@ public class UDFToLong extends UDF { } } + public LongWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + longWritable.set(i.getBigDecimal().longValue()); + return longWritable; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java index 558d405..5f42865 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToShort.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -181,4 +182,13 @@ public class UDFToShort extends UDF { } } + public ShortWritable evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + shortWritable.set(i.getBigDecimal().shortValue()); + return shortWritable; + } + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java index 4a38f8c..1d06eb3 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFToString.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.udf; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -141,10 +142,19 @@ public class UDFToString extends UDF { } } + public Text evaluate(BigDecimalWritable i) { + if (i == null) { + return null; + } else { + t.set(i.toString()); + return t; + } + } + public Text evaluate (BytesWritable bw) { if (null == bw) { return null; -} + } t.set(bw.getBytes(),0,bw.getLength()); return t; } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java index 43ee547..d5c8e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCorrelation.java @@ -102,6 +102,7 @@ public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -110,6 +111,7 @@ public class GenericUDAFCorrelation extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: return new GenericUDAFCorrelationEvaluator(); case STRING: case BOOLEAN: diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java index fdcedfb..f7f24f5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovariance.java @@ -93,6 +93,7 @@ public class GenericUDAFCovariance extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -101,6 +102,7 @@ public class GenericUDAFCovariance extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: return new GenericUDAFCovarianceEvaluator(); case STRING: case BOOLEAN: diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java index ef3023e..ecf7151 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCovarianceSample.java @@ -67,6 +67,7 @@ public class GenericUDAFCovarianceSample extends GenericUDAFCovariance { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: switch (((PrimitiveTypeInfo) parameters[1]).getPrimitiveCategory()) { case BYTE: case SHORT: @@ -75,6 +76,7 @@ public class GenericUDAFCovarianceSample extends GenericUDAFCovariance { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: return new GenericUDAFCovarianceSampleEvaluator(); case STRING: case BOOLEAN: diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java index e0f81e0..b31f1da 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFHistogramNumeric.java @@ -45,7 +45,7 @@ import org.apache.hadoop.util.StringUtils; /** * Computes an approximate histogram of a numerical column using a user-specified number of bins. - * + * * The output is an array of (x,y) pairs as Hive struct objects that represents the histogram's * bin centers and heights. */ @@ -72,7 +72,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { throw new UDFArgumentTypeException(parameters.length - 1, "Please specify exactly two arguments."); } - + // validate the first parameter, which is the expression to compute over if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentTypeException(0, @@ -87,6 +87,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: break; case STRING: case BOOLEAN: @@ -170,7 +171,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { - // Return a single ArrayList where the first element is the number of histogram bins, + // Return a single ArrayList where the first element is the number of histogram bins, // and subsequent elements represent histogram (x,y) pairs. StdAgg myagg = (StdAgg) agg; return myagg.histogram.serialize(); @@ -233,7 +234,7 @@ public class GenericUDAFHistogramNumeric extends AbstractGenericUDAFResolver { } - // Aggregation buffer definition and manipulation methods + // Aggregation buffer definition and manipulation methods static class StdAgg implements AggregationBuffer { NumericHistogram histogram; // the histogram object }; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java index 4193a97..eab6e1e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFPercentileApprox.java @@ -46,7 +46,7 @@ import org.apache.hadoop.util.StringUtils; /** * Computes an approximate percentile (quantile) from an approximate histogram, for very * large numbers of rows where the regular percentile() UDAF might run out of memory. - * + * * The input is a single double value or an array of double values representing the quantiles * requested. The output, corresponding to the input, is either an single double value or an * array of doubles that are the quantile values. @@ -59,7 +59,7 @@ import org.apache.hadoop.util.StringUtils; extended = "'expr' can be any numeric column, including doubles and floats, and 'pc' is " + "either a single double/float with a requested percentile, or an array of double/" + "float with multiple percentiles. If 'nb' is not specified, the default " + - "approximation is done with 10,000 histogram bins, which means that if there are " + + "approximation is done with 10,000 histogram bins, which means that if there are " + "10,000 or fewer unique values in 'expr', you can expect an exact result. The " + "percentile() function always computes an exact percentile and can run out of " + "memory if there are too many unique values in a column, which necessitates " + @@ -77,7 +77,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { throw new UDFArgumentTypeException(parameters.length - 1, "Please specify either two or three arguments."); } - + // Validate the first parameter, which is the expression to compute over. This should be a // numeric primitive type. if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { @@ -93,6 +93,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { case FLOAT: case DOUBLE: case TIMESTAMP: + case DECIMAL: break; default: throw new UDFArgumentTypeException(0, @@ -147,7 +148,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { // Also make sure it is a constant. if (!ObjectInspectorUtils.isConstantObjectInspector(parameters[1])) { throw new UDFArgumentTypeException(1, - "The second argument must be a constant, but " + parameters[1].getTypeName() + + "The second argument must be a constant, but " + parameters[1].getTypeName() + " was passed instead."); } @@ -172,7 +173,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { // Also make sure it is a constant. if (!ObjectInspectorUtils.isConstantObjectInspector(parameters[2])) { throw new UDFArgumentTypeException(2, - "The third argument must be a constant, but " + parameters[2].getTypeName() + + "The third argument must be a constant, but " + parameters[2].getTypeName() + " was passed instead."); } } @@ -184,7 +185,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { return new GenericUDAFSinglePercentileApproxEvaluator(); } } - + public static class GenericUDAFSinglePercentileApproxEvaluator extends GenericUDAFPercentileApproxEvaluator { @@ -234,7 +235,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { } } - + public static class GenericUDAFMultiplePercentileApproxEvaluator extends GenericUDAFPercentileApproxEvaluator { @@ -299,7 +300,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { - if(partial == null) { + if(partial == null) { return; } PercentileAggBuf myagg = (PercentileAggBuf) agg; @@ -316,7 +317,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { } // merge histograms - myagg.histogram.merge(partialHistogram); + myagg.histogram.merge(partialHistogram); } @Override @@ -382,7 +383,7 @@ public class GenericUDAFPercentileApprox extends AbstractGenericUDAFResolver { } else { result = new double[1]; result[0] = PrimitiveObjectInspectorUtils.getDouble( - quantileObj, + quantileObj, (PrimitiveObjectInspector)quantileOI); } for(int ii = 0; ii < result.length; ++ii) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java index 2a1a617..5d0aa50 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStd.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Compute the standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. - * + * */ @Description(name = "std,stddev,stddev_pop", value = "_FUNC_(x) - Returns the standard deviation of a set of numbers") @@ -56,6 +56,7 @@ public class GenericUDAFStd extends GenericUDAFVariance { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFStdEvaluator(); case BOOLEAN: default: @@ -68,7 +69,7 @@ public class GenericUDAFStd extends GenericUDAFVariance { /** * Compute the standard deviation by extending GenericUDAFVarianceEvaluator * and overriding the terminate() method of the evaluator. - * + * */ public static class GenericUDAFStdEvaluator extends GenericUDAFVarianceEvaluator { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java index d5791ed..cde947c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFStdSample.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Compute the sample standard deviation by extending GenericUDAFVariance and * overriding the terminate() method of the evaluator. - * + * */ @Description(name = "stddev_samp", value = "_FUNC_(x) - Returns the sample standard deviation of a set of numbers") @@ -55,6 +55,7 @@ public class GenericUDAFStdSample extends GenericUDAFVariance { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFStdSampleEvaluator(); case BOOLEAN: default: diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java index 5a20f87..f80491d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFSum.java @@ -17,12 +17,15 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import java.math.BigDecimal; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -66,6 +69,8 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { case DOUBLE: case STRING: return new GenericUDAFSumDouble(); + case DECIMAL: + return new GenericUDAFSumBigDecimal(); case BOOLEAN: default: throw new UDFArgumentTypeException(0, @@ -75,6 +80,89 @@ public class GenericUDAFSum extends AbstractGenericUDAFResolver { } /** + * GenericUDAFSumBigDecimal. + * + */ + public static class GenericUDAFSumBigDecimal extends GenericUDAFEvaluator { + private PrimitiveObjectInspector inputOI; + private BigDecimalWritable result; + + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { + assert (parameters.length == 1); + super.init(m, parameters); + result = new BigDecimalWritable(BigDecimal.ZERO); + inputOI = (PrimitiveObjectInspector) parameters[0]; + return PrimitiveObjectInspectorFactory.writableBigDecimalObjectInspector; + } + + /** class for storing double sum value. */ + static class SumBigDecimalAgg implements AggregationBuffer { + boolean empty; + BigDecimal sum; + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + SumBigDecimalAgg agg = new SumBigDecimalAgg(); + reset(agg); + return agg; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + SumBigDecimalAgg bdAgg = (SumBigDecimalAgg) agg; + bdAgg.empty = true; + bdAgg.sum = BigDecimal.ZERO; + } + + boolean warned = false; + + @Override + public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { + assert (parameters.length == 1); + try { + merge(agg, parameters[0]); + } catch (NumberFormatException e) { + if (!warned) { + warned = true; + LOG.warn(getClass().getSimpleName() + " " + + StringUtils.stringifyException(e)); + LOG + .warn(getClass().getSimpleName() + + " ignoring similar exceptions."); + } + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + return terminate(agg); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial != null) { + SumBigDecimalAgg myagg = (SumBigDecimalAgg) agg; + myagg.empty = false; + myagg.sum = myagg.sum.add( + PrimitiveObjectInspectorUtils.getBigDecimal(partial, inputOI)); + } + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + SumBigDecimalAgg myagg = (SumBigDecimalAgg) agg; + if (myagg.empty) { + return null; + } + result.set(myagg.sum); + return result; + } + + } + + /** * GenericUDAFSumDouble. * */ diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java index 0b40d5c..7bba95c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVariance.java @@ -43,7 +43,7 @@ import org.apache.hadoop.util.StringUtils; /** * Compute the variance. This class is extended by: GenericUDAFVarianceSample * GenericUDAFStd GenericUDAFStdSample - * + * */ @Description(name = "variance,var_pop", value = "_FUNC_(x) - Returns the variance of a set of numbers") @@ -72,6 +72,7 @@ public class GenericUDAFVariance extends AbstractGenericUDAFResolver { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFVarianceEvaluator(); case BOOLEAN: default: @@ -85,18 +86,18 @@ public class GenericUDAFVariance extends AbstractGenericUDAFResolver { * Evaluate the variance using the algorithm described by Chan, Golub, and LeVeque in * "Algorithms for computing the sample variance: analysis and recommendations" * The American Statistician, 37 (1983) pp. 242--247. - * + * * variance = variance1 + variance2 + n/(m*(m+n)) * pow(((m/n)*t1 - t2),2) - * + * * where: - variance is sum[x-avg^2] (this is actually n times the variance) * and is updated at every step. - n is the count of elements in chunk1 - m is - * the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 = + * the count of elements in chunk2 - t1 = sum of elements in chunk1, t2 = * sum of elements in chunk2. * * This algorithm was proven to be numerically stable by J.L. Barlow in * "Error analysis of a pairwise summation algorithm to compute sample variance" * Numer. Math, 58 (1991) pp. 583--590 - * + * */ public static class GenericUDAFVarianceEvaluator extends GenericUDAFEvaluator { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java index 65d860d..fa549e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFVarianceSample.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; /** * Compute the sample variance by extending GenericUDAFVariance and overriding * the terminate() method of the evaluator. - * + * */ @Description(name = "var_samp", value = "_FUNC_(x) - Returns the sample variance of a set of numbers") @@ -56,6 +56,7 @@ public class GenericUDAFVarianceSample extends GenericUDAFVariance { case DOUBLE: case STRING: case TIMESTAMP: + case DECIMAL: return new GenericUDAFVarianceSampleEvaluator(); case BOOLEAN: default: diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java new file mode 100644 index 0000000..d6776d1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFToDecimal.java @@ -0,0 +1,74 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.udf.generic; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter.BigDecimalConverter; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; + +@Description(name = "decimal", value = "_FUNC_(a) - cast a to decimal") +public class GenericUDFToDecimal extends GenericUDF { + + private PrimitiveObjectInspector argumentOI; + private BigDecimalConverter bdConverter; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length < 1) { + throw new UDFArgumentLengthException( + "The function DECIMAL requires at least one argument, got " + + arguments.length); + } + try { + argumentOI = (PrimitiveObjectInspector) arguments[0]; + } catch (ClassCastException e) { + throw new UDFArgumentException( + "The function DECIMAL takes only primitive types"); + } + + bdConverter = new BigDecimalConverter(argumentOI, + PrimitiveObjectInspectorFactory.writableBigDecimalObjectInspector); + return PrimitiveObjectInspectorFactory.writableBigDecimalObjectInspector; + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + Object o0 = arguments[0].get(); + if (o0 == null) { + return null; + } + + return bdConverter.convert(o0); + } + + @Override + public String getDisplayString(String[] children) { + assert (children.length == 1); + StringBuilder sb = new StringBuilder(); + sb.append("CAST( "); + sb.append(children[0]); + sb.append(" AS DECIMAL)"); + return sb.toString(); + } + +} diff --git ql/src/test/queries/clientpositive/decimal_1.q ql/src/test/queries/clientpositive/decimal_1.q new file mode 100644 index 0000000..063afa8 --- /dev/null +++ ql/src/test/queries/clientpositive/decimal_1.q @@ -0,0 +1,17 @@ +drop table decimal_1; + +create table decimal_1 (t decimal); +alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; + +insert overwrite table decimal_1 + select cast('17.29' as decimal) from src limit 1; +select cast(t as boolean) from decimal_1 limit 1; +select cast(t as tinyint) from decimal_1 limit 1; +select cast(t as smallint) from decimal_1 limit 1; +select cast(t as int) from decimal_1 limit 1; +select cast(t as bigint) from decimal_1 limit 1; +select cast(t as float) from decimal_1 limit 1; +select cast(t as double) from decimal_1 limit 1; +select cast(t as string) from decimal_1 limit 1; + +drop table decimal_1; diff --git ql/src/test/queries/clientpositive/decimal_2.q ql/src/test/queries/clientpositive/decimal_2.q new file mode 100644 index 0000000..f2d7cb8 --- /dev/null +++ ql/src/test/queries/clientpositive/decimal_2.q @@ -0,0 +1,17 @@ +drop table decimal_2; + +create table decimal_2 (t decimal); +alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'; + +insert overwrite table decimal_2 + select cast('17.29' as decimal) from src limit 1; +select cast(t as boolean) from decimal_2 limit 1; +select cast(t as tinyint) from decimal_2 limit 1; +select cast(t as smallint) from decimal_2 limit 1; +select cast(t as int) from decimal_2 limit 1; +select cast(t as bigint) from decimal_2 limit 1; +select cast(t as float) from decimal_2 limit 1; +select cast(t as double) from decimal_2 limit 1; +select cast(t as string) from decimal_2 limit 1; + +drop table decimal_2; diff --git ql/src/test/queries/clientpositive/decimal_3.q ql/src/test/queries/clientpositive/decimal_3.q new file mode 100644 index 0000000..54e8383 --- /dev/null +++ ql/src/test/queries/clientpositive/decimal_3.q @@ -0,0 +1,24 @@ +CREATE TABLE DECIMAL_3(key decimal, value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE; + +LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_3; + +SELECT * FROM DECIMAL_3; + +SELECT * FROM DECIMAL_3 ORDER BY key; + +SELECT * FROM DECIMAL_3 ORDER BY key DESC; + +SELECT * FROM DECIMAL_3 ORDER BY (key, value); + +SELECT DISTINCT key FROM DECIMAL_3; + +SELECT key, sum(value) FROM DECIMAL_3 GROUP BY key ORDER BY key; + +SELECT value, sum(key) FROM DECIMAL_3 GROUP BY value; + +SELECT * FROM DECIMAL_3 a JOIN DECIMAL_3 b ON (a.key = b.key); + +DROP TABLE DECIMAL_3; diff --git ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out index a3686d9..fdd013a 100644 --- ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out +++ ql/src/test/results/clientnegative/invalid_cast_from_binary_1.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl (a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl -FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToInteger with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) +FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToInteger with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientnegative/invalid_cast_from_binary_2.q.out ql/src/test/results/clientnegative/invalid_cast_from_binary_2.q.out index 494b22c..f23283b 100644 --- ql/src/test/results/clientnegative/invalid_cast_from_binary_2.q.out +++ ql/src/test/results/clientnegative/invalid_cast_from_binary_2.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl (a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl -FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToByte with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) +FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToByte with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientnegative/invalid_cast_from_binary_3.q.out ql/src/test/results/clientnegative/invalid_cast_from_binary_3.q.out index 2428f3c..3cf06b3 100644 --- ql/src/test/results/clientnegative/invalid_cast_from_binary_3.q.out +++ ql/src/test/results/clientnegative/invalid_cast_from_binary_3.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl (a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl -FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToShort with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) +FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToShort with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientnegative/invalid_cast_from_binary_4.q.out ql/src/test/results/clientnegative/invalid_cast_from_binary_4.q.out index 25ec117..3789cc2 100644 --- ql/src/test/results/clientnegative/invalid_cast_from_binary_4.q.out +++ ql/src/test/results/clientnegative/invalid_cast_from_binary_4.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl (a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl -FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToLong with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) +FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToLong with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientnegative/invalid_cast_from_binary_5.q.out ql/src/test/results/clientnegative/invalid_cast_from_binary_5.q.out index 6152e47..55a776d 100644 --- ql/src/test/results/clientnegative/invalid_cast_from_binary_5.q.out +++ ql/src/test/results/clientnegative/invalid_cast_from_binary_5.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl (a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl -FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToFloat with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) +FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToFloat with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientnegative/invalid_cast_from_binary_6.q.out ql/src/test/results/clientnegative/invalid_cast_from_binary_6.q.out index 6eff980..afd92e7 100644 --- ql/src/test/results/clientnegative/invalid_cast_from_binary_6.q.out +++ ql/src/test/results/clientnegative/invalid_cast_from_binary_6.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: create table tbl (a binary) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tbl -FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToDouble with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(string) _FUNC_(timestamp) +FAILED: SemanticException Line 0:-1 Wrong arguments 'a': No matching method for class org.apache.hadoop.hive.ql.udf.UDFToDouble with (binary). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(float) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientnegative/wrong_column_type.q.out ql/src/test/results/clientnegative/wrong_column_type.q.out index ab33949..d5dd66b 100644 --- ql/src/test/results/clientnegative/wrong_column_type.q.out +++ ql/src/test/results/clientnegative/wrong_column_type.q.out @@ -3,4 +3,4 @@ PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE dest1(a float) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest1 -FAILED: NoMatchingMethodException No matching method for class org.apache.hadoop.hive.ql.udf.UDFToFloat with (array). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) +FAILED: NoMatchingMethodException No matching method for class org.apache.hadoop.hive.ql.udf.UDFToFloat with (array). Possible choices: _FUNC_(void) _FUNC_(boolean) _FUNC_(tinyint) _FUNC_(smallint) _FUNC_(int) _FUNC_(bigint) _FUNC_(double) _FUNC_(string) _FUNC_(timestamp) _FUNC_(decimal) diff --git ql/src/test/results/clientpositive/decimal_1.q.out ql/src/test/results/clientpositive/decimal_1.q.out new file mode 100644 index 0000000..4cf6e0a --- /dev/null +++ ql/src/test/results/clientpositive/decimal_1.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: drop table decimal_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table decimal_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table decimal_1 (t decimal) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table decimal_1 (t decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@decimal_1 +PREHOOK: query: alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: alter table decimal_1 set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 +PREHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: insert overwrite table decimal_1 + select cast('17.29' as decimal) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +PREHOOK: query: select cast(t as boolean) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as boolean) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +true +PREHOOK: query: select cast(t as tinyint) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as tinyint) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as smallint) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as smallint) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as int) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as int) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as bigint) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as bigint) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as float) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as float) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as double) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as double) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as string) from decimal_1 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as string) from decimal_1 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_1 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] +17.29 +PREHOOK: query: drop table decimal_1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_1 +PREHOOK: Output: default@decimal_1 +POSTHOOK: query: drop table decimal_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_1 +POSTHOOK: Output: default@decimal_1 +POSTHOOK: Lineage: decimal_1.t EXPRESSION [] diff --git ql/src/test/results/clientpositive/decimal_2.q.out ql/src/test/results/clientpositive/decimal_2.q.out new file mode 100644 index 0000000..284d9a5 --- /dev/null +++ ql/src/test/results/clientpositive/decimal_2.q.out @@ -0,0 +1,117 @@ +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table decimal_2 (t decimal) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table decimal_2 (t decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: alter table decimal_2 set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +PREHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal) from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: insert overwrite table decimal_2 + select cast('17.29' as decimal) from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +PREHOOK: query: select cast(t as boolean) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as boolean) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +true +PREHOOK: query: select cast(t as tinyint) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as tinyint) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as smallint) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as smallint) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as int) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as int) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as bigint) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as bigint) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17 +PREHOOK: query: select cast(t as float) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as float) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as double) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as double) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17.29 +PREHOOK: query: select cast(t as string) from decimal_2 limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: query: select cast(t as string) from decimal_2 limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_2 +#### A masked pattern was here #### +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] +17.29 +PREHOOK: query: drop table decimal_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_2 +PREHOOK: Output: default@decimal_2 +POSTHOOK: query: drop table decimal_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_2 +POSTHOOK: Output: default@decimal_2 +POSTHOOK: Lineage: decimal_2.t EXPRESSION [] diff --git ql/src/test/results/clientpositive/decimal_3.q.out ql/src/test/results/clientpositive/decimal_3.q.out new file mode 100644 index 0000000..a558c80 --- /dev/null +++ ql/src/test/results/clientpositive/decimal_3.q.out @@ -0,0 +1,351 @@ +PREHOOK: query: CREATE TABLE DECIMAL_3(key decimal, value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE DECIMAL_3(key decimal, value int) +ROW FORMAT DELIMITED + FIELDS TERMINATED BY ' ' +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@DECIMAL_3 +PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_3 +PREHOOK: type: LOAD +PREHOOK: Output: default@decimal_3 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/kv7.txt' INTO TABLE DECIMAL_3 +POSTHOOK: type: LOAD +POSTHOOK: Output: default@decimal_3 +PREHOOK: query: SELECT * FROM DECIMAL_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +1E+99 0 +1E-99 0 +0 0 +100 100 +10 10 +1 1 +0.1 0 +0.01 0 +200 200 +20 20 +2 2 +0 0 +0.2 0 +0.02 0 +0.3 0 +0.33 0 +0.333 0 +-0.3 0 +-0.33 0 +-0.333 0 +1.0 1 +2 2 +3.14 3 +-1.12 -1 +-1.12 -1 +-1.122 -11 +1.12 1 +1.122 1 +124.00 124 +125.2 125 +-1255.49 -1255 +3.14 3 +3.14 3 +-1234567890.1234567890 -1234567890 +1234567890.1234567800 1234567890 +PREHOOK: query: SELECT * FROM DECIMAL_3 ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_3 ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +-1234567890.1234567890 -1234567890 +-1255.49 -1255 +-1.122 -11 +-1.12 -1 +-1.12 -1 +-0.333 0 +-0.33 0 +-0.3 0 +0 0 +0 0 +1E-99 0 +0.01 0 +0.02 0 +0.1 0 +0.2 0 +0.3 0 +0.33 0 +0.333 0 +1 1 +1.0 1 +1.12 1 +1.122 1 +2 2 +2 2 +3.14 3 +3.14 3 +3.14 3 +10 10 +20 20 +100 100 +124.00 124 +125.2 125 +200 200 +1234567890.1234567800 1234567890 +1E+99 0 +PREHOOK: query: SELECT * FROM DECIMAL_3 ORDER BY key DESC +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_3 ORDER BY key DESC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +1E+99 0 +1234567890.1234567800 1234567890 +200 200 +125.2 125 +124.00 124 +100 100 +20 20 +10 10 +3.14 3 +3.14 3 +3.14 3 +2 2 +2 2 +1.122 1 +1.12 1 +1.0 1 +1 1 +0.333 0 +0.33 0 +0.3 0 +0.2 0 +0.1 0 +0.02 0 +0.01 0 +1E-99 0 +0 0 +0 0 +-0.3 0 +-0.33 0 +-0.333 0 +-1.12 -1 +-1.12 -1 +-1.122 -11 +-1255.49 -1255 +-1234567890.1234567890 -1234567890 +PREHOOK: query: SELECT * FROM DECIMAL_3 ORDER BY (key, value) +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_3 ORDER BY (key, value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +-1234567890.1234567890 -1234567890 +-1255.49 -1255 +-1.122 -11 +-1.12 -1 +-1.12 -1 +-0.333 0 +-0.33 0 +-0.3 0 +0 0 +0 0 +1E-99 0 +0.01 0 +0.02 0 +0.1 0 +0.2 0 +0.3 0 +0.33 0 +0.333 0 +1 1 +1.0 1 +1.12 1 +1.122 1 +2 2 +2 2 +3.14 3 +3.14 3 +3.14 3 +10 10 +20 20 +100 100 +124.00 124 +125.2 125 +200 200 +1234567890.1234567800 1234567890 +1E+99 0 +PREHOOK: query: SELECT DISTINCT key FROM DECIMAL_3 +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DISTINCT key FROM DECIMAL_3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +-1234567890.123456789 +-1255.49 +-1.122 +-1.12 +-0.333 +-0.33 +-0.3 +0.00 +1E-99 +0.01 +0.02 +0.1 +0.2 +0.3 +0.33 +0.333 +1 +1.12 +1.122 +2 +3.14 +1E+1 +2E+1 +1E+2 +124 +125.2 +2E+2 +1234567890.12345678 +1E+99 +PREHOOK: query: SELECT key, sum(value) FROM DECIMAL_3 GROUP BY key ORDER BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, sum(value) FROM DECIMAL_3 GROUP BY key ORDER BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +-1234567890.123456789 -1234567890 +-1255.49 -1255 +-1.122 -11 +-1.12 -2 +-0.333 0 +-0.33 0 +-0.3 0 +0.00 0 +1E-99 0 +0.01 0 +0.02 0 +0.1 0 +0.2 0 +0.3 0 +0.33 0 +0.333 0 +1 2 +1.12 1 +1.122 1 +2 4 +3.14 9 +1E+1 10 +2E+1 20 +1E+2 100 +124 124 +125.2 125 +2E+2 200 +1234567890.12345678 1234567890 +1E+99 0 +PREHOOK: query: SELECT value, sum(key) FROM DECIMAL_3 GROUP BY value +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT value, sum(key) FROM DECIMAL_3 GROUP BY value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +-1234567890 -1234567890.1234567890 +-1255 -1255.49 +-11 -1.122 +-1 -2.24 +0 1000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.330000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001 +1 4.242 +2 4 +3 9.42 +10 10 +20 20 +100 100 +124 124.00 +125 125.2 +200 200 +1234567890 1234567890.1234567800 +PREHOOK: query: SELECT * FROM DECIMAL_3 a JOIN DECIMAL_3 b ON (a.key = b.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM DECIMAL_3 a JOIN DECIMAL_3 b ON (a.key = b.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@decimal_3 +#### A masked pattern was here #### +-1234567890.1234567890 -1234567890 -1234567890.1234567890 -1234567890 +-1255.49 -1255 -1255.49 -1255 +-1.122 -11 -1.122 -11 +-1.12 -1 -1.12 -1 +-1.12 -1 -1.12 -1 +-1.12 -1 -1.12 -1 +-1.12 -1 -1.12 -1 +-0.333 0 -0.333 0 +-0.33 0 -0.33 0 +-0.3 0 -0.3 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +0 0 0 0 +1E-99 0 1E-99 0 +0.01 0 0.01 0 +0.02 0 0.02 0 +0.1 0 0.1 0 +0.2 0 0.2 0 +0.3 0 0.3 0 +0.33 0 0.33 0 +0.333 0 0.333 0 +1 1 1.0 1 +1 1 1 1 +1.0 1 1.0 1 +1.0 1 1 1 +1.12 1 1.12 1 +1.122 1 1.122 1 +2 2 2 2 +2 2 2 2 +2 2 2 2 +2 2 2 2 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +3.14 3 3.14 3 +10 10 10 10 +20 20 20 20 +100 100 100 100 +124.00 124 124.00 124 +125.2 125 125.2 125 +200 200 200 200 +1234567890.1234567800 1234567890 1234567890.1234567800 1234567890 +1E+99 0 1E+99 0 +PREHOOK: query: DROP TABLE DECIMAL_3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@decimal_3 +PREHOOK: Output: default@decimal_3 +POSTHOOK: query: DROP TABLE DECIMAL_3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@decimal_3 +POSTHOOK: Output: default@decimal_3 diff --git ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out index bab3a82..2b621a1 100644 --- ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out +++ ql/src/test/results/clientpositive/ql_rewrite_gbtoidx.q.out @@ -1011,7 +1011,7 @@ STAGE PLANS: expr: _col4 type: int expr: ((_col5 - _col2) / _col2) - type: double + type: decimal outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/udf_round.q.out ql/src/test/results/clientpositive/udf_round.q.out index 900e91e..5279ec5 100644 --- ql/src/test/results/clientpositive/udf_round.q.out +++ ql/src/test/results/clientpositive/udf_round.q.out @@ -40,7 +40,7 @@ FROM src LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -55555.0 55555.0 55555.0 55555.0 55555.0 55560.0 55600.0 56000.0 60000.0 100000.0 0.0 0.0 0.0 +55555 55555 55555.0 55555.00 55555.000 5.556E+4 5.56E+4 5.6E+4 6E+4 1E+5 0E+6 0E+7 0E+8 PREHOOK: query: SELECT round(125.315), round(125.315, 0), round(125.315, 1), round(125.315, 2), round(125.315, 3), round(125.315, 4), diff --git ql/src/test/results/clientpositive/udf_round_2.q.out ql/src/test/results/clientpositive/udf_round_2.q.out index 5dbbd2b..747e5fc 100644 --- ql/src/test/results/clientpositive/udf_round_2.q.out +++ ql/src/test/results/clientpositive/udf_round_2.q.out @@ -57,4 +57,4 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### POSTHOOK: Lineage: tsttbl1.n EXPRESSION [] -Infinity Infinity Infinity Infinity +NULL NULL Infinity Infinity diff --git serde/if/serde.thrift serde/if/serde.thrift index e40c697..9847720 100644 --- serde/if/serde.thrift +++ serde/if/serde.thrift @@ -53,6 +53,7 @@ const string STRING_TYPE_NAME = "string"; const string DATE_TYPE_NAME = "date"; const string DATETIME_TYPE_NAME = "datetime"; const string TIMESTAMP_TYPE_NAME = "timestamp"; +const string DECIMAL_TYPE_NAME = "decimal"; const string BINARY_TYPE_NAME = "binary"; const string LIST_TYPE_NAME = "array"; @@ -63,7 +64,7 @@ const string UNION_TYPE_NAME = "uniontype"; const string LIST_COLUMNS = "columns"; const string LIST_COLUMN_TYPES = "columns.types"; -const set PrimitiveTypes = [ VOID_TYPE_NAME BOOLEAN_TYPE_NAME TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME FLOAT_TYPE_NAME DOUBLE_TYPE_NAME STRING_TYPE_NAME DATE_TYPE_NAME DATETIME_TYPE_NAME TIMESTAMP_TYPE_NAME BINARY_TYPE_NAME], +const set PrimitiveTypes = [ VOID_TYPE_NAME BOOLEAN_TYPE_NAME TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME FLOAT_TYPE_NAME DOUBLE_TYPE_NAME STRING_TYPE_NAME DATE_TYPE_NAME DATETIME_TYPE_NAME TIMESTAMP_TYPE_NAME DECIMAL_TYPE_NAME BINARY_TYPE_NAME], const set CollectionTypes = [ LIST_TYPE_NAME MAP_TYPE_NAME ], diff --git serde/src/gen/thrift/gen-cpp/serde_constants.cpp serde/src/gen/thrift/gen-cpp/serde_constants.cpp index 6259225..3997026 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.cpp +++ serde/src/gen/thrift/gen-cpp/serde_constants.cpp @@ -63,6 +63,8 @@ serdeConstants::serdeConstants() { TIMESTAMP_TYPE_NAME = "timestamp"; + DECIMAL_TYPE_NAME = "decimal"; + BINARY_TYPE_NAME = "binary"; LIST_TYPE_NAME = "array"; @@ -89,6 +91,7 @@ serdeConstants::serdeConstants() { PrimitiveTypes.insert("date"); PrimitiveTypes.insert("datetime"); PrimitiveTypes.insert("timestamp"); + PrimitiveTypes.insert("decimal"); PrimitiveTypes.insert("binary"); CollectionTypes.insert("array"); diff --git serde/src/gen/thrift/gen-cpp/serde_constants.h serde/src/gen/thrift/gen-cpp/serde_constants.h index cb9c3e4..0a63308 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.h +++ serde/src/gen/thrift/gen-cpp/serde_constants.h @@ -41,6 +41,7 @@ class serdeConstants { std::string DATE_TYPE_NAME; std::string DATETIME_TYPE_NAME; std::string TIMESTAMP_TYPE_NAME; + std::string DECIMAL_TYPE_NAME; std::string BINARY_TYPE_NAME; std::string LIST_TYPE_NAME; std::string MAP_TYPE_NAME; diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index db39e6e..28f8d6a 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -85,6 +85,8 @@ public class serdeConstants { public static final String TIMESTAMP_TYPE_NAME = "timestamp"; + public static final String DECIMAL_TYPE_NAME = "decimal"; + public static final String BINARY_TYPE_NAME = "binary"; public static final String LIST_TYPE_NAME = "array"; @@ -113,6 +115,7 @@ public class serdeConstants { PrimitiveTypes.add("date"); PrimitiveTypes.add("datetime"); PrimitiveTypes.add("timestamp"); + PrimitiveTypes.add("decimal"); PrimitiveTypes.add("binary"); } diff --git serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php index d95feef..130c17e 100644 --- serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php +++ serde/src/gen/thrift/gen-php/org/apache/hadoop/hive/serde/Types.php @@ -68,6 +68,8 @@ $GLOBALS['serde_CONSTANTS']['DATETIME_TYPE_NAME'] = "datetime"; $GLOBALS['serde_CONSTANTS']['TIMESTAMP_TYPE_NAME'] = "timestamp"; +$GLOBALS['serde_CONSTANTS']['DECIMAL_TYPE_NAME'] = "decimal"; + $GLOBALS['serde_CONSTANTS']['BINARY_TYPE_NAME'] = "binary"; $GLOBALS['serde_CONSTANTS']['LIST_TYPE_NAME'] = "array"; @@ -95,6 +97,7 @@ $GLOBALS['serde_CONSTANTS']['PrimitiveTypes'] = array( "date" => true, "datetime" => true, "timestamp" => true, + "decimal" => true, "binary" => true, ); diff --git serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py index 598db32..623bf0e 100644 --- serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py +++ serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py @@ -35,6 +35,7 @@ STRING_TYPE_NAME = "string" DATE_TYPE_NAME = "date" DATETIME_TYPE_NAME = "datetime" TIMESTAMP_TYPE_NAME = "timestamp" +DECIMAL_TYPE_NAME = "decimal" BINARY_TYPE_NAME = "binary" LIST_TYPE_NAME = "array" MAP_TYPE_NAME = "map" @@ -55,6 +56,7 @@ PrimitiveTypes = set([ "date", "datetime", "timestamp", + "decimal", "binary", ]) CollectionTypes = set([ diff --git serde/src/gen/thrift/gen-rb/serde_constants.rb serde/src/gen/thrift/gen-rb/serde_constants.rb index bfe4a3b..bd17761 100644 --- serde/src/gen/thrift/gen-rb/serde_constants.rb +++ serde/src/gen/thrift/gen-rb/serde_constants.rb @@ -59,6 +59,8 @@ DATETIME_TYPE_NAME = %q"datetime" TIMESTAMP_TYPE_NAME = %q"timestamp" +DECIMAL_TYPE_NAME = %q"decimal" + BINARY_TYPE_NAME = %q"binary" LIST_TYPE_NAME = %q"array" @@ -86,6 +88,7 @@ PrimitiveTypes = Set.new([ %q"date", %q"datetime", %q"timestamp", + %q"decimal", %q"binary", ]) diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index e906a3f..4954b29 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspect import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -279,6 +280,10 @@ public final class SerDeUtils { sb.append(txt.toString()); break; } + case DECIMAL: { + sb.append(((BigDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); + break; + } default: throw new RuntimeException("Unknown primitive type: " + poi.getPrimitiveCategory()); diff --git serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java index 450e063..b20c82a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java @@ -19,6 +19,9 @@ package org.apache.hadoop.hive.serde2.binarysortable; import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -33,6 +36,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -45,6 +49,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspecto import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -106,6 +111,9 @@ public class BinarySortableSerDe implements SerDe { StructObjectInspector rowObjectInspector; boolean[] columnSortOrderIsDesc; + + private static byte[] decimalBuffer = null; + private static Charset decimalCharSet = Charset.forName("US-ASCII"); @Override public void initialize(Configuration conf, Properties tbl) @@ -370,6 +378,58 @@ public class BinarySortableSerDe implements SerDe { } t.setBinarySortable(bytes, 0); return t; + + case DECIMAL: { + BigDecimalWritable bdw = (reuse == null ? new BigDecimalWritable() : + (BigDecimalWritable) reuse); + + int b = buffer.read(invert) - 2; + assert (b == 1 || b == -1 || b == 0); + boolean positive = b != -1; + + int factor = buffer.read(invert) ^ 0x80; + for (int i = 0; i < 3; i++) { + factor = (factor << 8) + (buffer.read(invert) & 0xff); + } + + if (!positive) { + factor = -factor; + } + + int start = buffer.tell(); + int length = 0; + + do { + b = buffer.read(positive ? invert : !invert); + assert(b != 1); + + if (b == 0) { + // end of digits + break; + } + + length++; + } while (true); + + if(decimalBuffer == null || decimalBuffer.length < length) { + decimalBuffer = new byte[length]; + } + buffer.seek(start); + for (int i = 0; i < length; ++i) { + decimalBuffer[i] = buffer.read(positive ? invert : !invert); + } + + String digits = new String(decimalBuffer, 0, length, decimalCharSet); + BigInteger bi = new BigInteger(digits); + BigDecimal bd = new BigDecimal(bi).scaleByPowerOfTen(factor-length); + + if (!positive) { + bd = bd.negate(); + } + + bdw.set(bd); + return bdw; + } default: { throw new RuntimeException("Unrecognized type: " @@ -377,6 +437,7 @@ public class BinarySortableSerDe implements SerDe { } } } + case LIST: { ListTypeInfo ltype = (ListTypeInfo) type; TypeInfo etype = ltype.getListElementTypeInfo(); @@ -608,6 +669,36 @@ public class BinarySortableSerDe implements SerDe { } return; } + case DECIMAL: { + BigDecimalObjectInspector boi = (BigDecimalObjectInspector) poi; + BigDecimal dec = boi.getPrimitiveJavaObject(o).stripTrailingZeros(); + + // get the sign of the big decimal + int sign = dec.compareTo(BigDecimal.ZERO); + + // we'll encode the absolute value (sign is separate) + dec = dec.abs(); + + // get the scale factor to turn big decimal into a decimal < 1 + int factor = dec.precision() - dec.scale(); + factor = sign == 1 ? factor : -factor; + + // convert the absolute big decimal to string + dec.scaleByPowerOfTen(Math.abs(dec.scale())); + String digits = dec.unscaledValue().toString(); + + // finally write out the pieces (sign, scale, digits) + buffer.write((byte) ( sign+2), invert); + buffer.write((byte) ((factor >> 24) ^ 0x80), invert); + buffer.write((byte) ( factor >> 16), invert); + buffer.write((byte) ( factor >> 8), invert); + buffer.write((byte) factor, invert); + serializeBytes(buffer, digits.getBytes(decimalCharSet), + digits.length(), sign == -1 ? !invert : invert); + + return; + } + default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java new file mode 100644 index 0000000..c2e1194 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/io/BigDecimalWritable.java @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.io; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableUtils; + +public class BigDecimalWritable implements WritableComparable { + + static final private Log LOG = LogFactory.getLog(BigDecimalWritable.class); + + private byte[] internalStorage = new byte[0]; + private int scale; + + public BigDecimalWritable() { + } + + public BigDecimalWritable(byte[] bytes, int scale) { + set(bytes, scale); + } + + public BigDecimalWritable(BigDecimalWritable writable) { + set(writable.getBigDecimal()); + } + + public BigDecimalWritable(BigDecimal value) { + set(value); + } + + public void set(BigDecimal value) { + set(value.unscaledValue().toByteArray(), value.scale()); + } + + public void set(BigDecimalWritable writable) { + set(writable.getBigDecimal()); + } + + public void set(byte[] bytes, int scale) { + this.internalStorage = bytes; + this.scale = scale; + } + + private final VInt vInt = new VInt(); + + public void setFromBytes(byte[] bytes, int offset, int length) { + LazyBinaryUtils.readVInt(bytes, offset, vInt); + scale = vInt.value; + offset += vInt.length; + LazyBinaryUtils.readVInt(bytes, offset, vInt); + offset += vInt.length; + if (internalStorage.length != vInt.value) { + internalStorage = new byte[vInt.value]; + } + System.arraycopy(bytes, offset, internalStorage, 0, vInt.value); + } + + public BigDecimal getBigDecimal() { + return new BigDecimal(new BigInteger(internalStorage), scale); + } + + @Override + public void readFields(DataInput in) throws IOException { + scale = WritableUtils.readVInt(in); + int byteArrayLen = WritableUtils.readVInt(in); + if (internalStorage.length != byteArrayLen) { + internalStorage = new byte[byteArrayLen]; + } + in.readFully(internalStorage); + } + + @Override + public void write(DataOutput out) throws IOException { + WritableUtils.writeVInt(out, scale); + WritableUtils.writeVInt(out, internalStorage.length); + out.write(internalStorage); + } + + @Override + public int compareTo(BigDecimalWritable that) { + return getBigDecimal().compareTo(that.getBigDecimal()); + } + + public void writeToByteStream(Output byteStream) { + LazyBinaryUtils.writeVInt(byteStream, scale); + LazyBinaryUtils.writeVInt(byteStream, internalStorage.length); + byteStream.write(internalStorage, 0, internalStorage.length); + } + + @Override + public String toString() { + return getBigDecimal().toString(); + } + + @Override + public boolean equals(Object other) { + if (other == null || !(other instanceof BigDecimalWritable)) { + return false; + } + BigDecimalWritable bdw = (BigDecimalWritable) other; + return getBigDecimal().equals(bdw.getBigDecimal()); + } + + @Override + public int hashCode() { + return getBigDecimal().hashCode(); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBigDecimal.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBigDecimal.java new file mode 100644 index 0000000..4f4f1f3 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBigDecimal.java @@ -0,0 +1,54 @@ +package org.apache.hadoop.hive.serde2.lazy; + +import java.math.BigDecimal; +import java.nio.charset.CharacterCodingException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBigDecimalObjectInspector; +import org.apache.hadoop.io.Text; + +public class LazyBigDecimal extends LazyPrimitive { + static final private Log LOG = LogFactory.getLog(LazyBigDecimal.class); + + public LazyBigDecimal(LazyBigDecimalObjectInspector oi) { + super(oi); + data = new BigDecimalWritable(); + } + + public LazyBigDecimal(LazyBigDecimal copy) { + super(copy); + data = new BigDecimalWritable(copy.data); + } + + /** + * Initilizes LazyBigDecimal object by interpreting the input bytes + * as a numeric string + * + * @param bytes + * @param start + * @param length + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + String byteData = null; + try { + byteData = Text.decode(bytes.getData(), start, length); + data.set(new BigDecimal(byteData)); + isNull = false; + } catch (NumberFormatException e) { + isNull = true; + LOG.debug("Data not in the BigDecimal data type range so converted to null. Given data is :" + + byteData, e); + } catch (CharacterCodingException e) { + isNull = true; + LOG.debug("Data not in the BigDecimal data type range so converted to null.", e); + } + } + + @Override + public BigDecimalWritable getWritableObject() { + return data; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index 20758a7..2c6251f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -26,8 +26,9 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; @@ -112,6 +113,8 @@ public final class LazyFactory { return new LazyTimestamp((LazyTimestampObjectInspector) oi); case BINARY: return new LazyBinary((LazyBinaryObjectInspector) oi); + case DECIMAL: + return new LazyBigDecimal((LazyBigDecimalObjectInspector) oi); default: throw new RuntimeException("Internal error: no LazyObject for " + p); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index c96f5b4..b93709e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2.lazy; import java.io.DataOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.ArrayList; @@ -32,6 +33,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -235,6 +237,12 @@ public final class LazyUtils { ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o)); break; } + case DECIMAL: { + BigDecimal bd = ((BigDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + ByteBuffer b = Text.encode(bd.toString()); + out.write(b.array(), 0, b.limit()); + break; + } default: { throw new RuntimeException("Hive internal error."); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBigDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBigDecimalObjectInspector.java new file mode 100644 index 0000000..15eb0f6 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBigDecimalObjectInspector.java @@ -0,0 +1,28 @@ +package org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive; + +import java.math.BigDecimal; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.lazy.LazyBigDecimal; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; + +public class LazyBigDecimalObjectInspector + extends AbstractPrimitiveLazyObjectInspector + implements BigDecimalObjectInspector { + + protected LazyBigDecimalObjectInspector() { + super(PrimitiveObjectInspectorUtils.decimalTypeEntry); + } + + @Override + public Object copyObject(Object o) { + return o == null ? null : new LazyBigDecimal((LazyBigDecimal) o); + } + + @Override + public BigDecimal getPrimitiveJavaObject(Object o) { + return o == null ? null : ((LazyBigDecimal) o).getWritableObject().getBigDecimal(); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java index 57d2fad..1e0ad00 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java @@ -57,6 +57,8 @@ public final class LazyPrimitiveObjectInspectorFactory { new LazyTimestampObjectInspector(); public static final LazyBinaryObjectInspector LAZY_BINARY_OBJECT_INSPECTOR = new LazyBinaryObjectInspector(); + public static final LazyBigDecimalObjectInspector LAZY_BIG_DECIMAL_OBJECT_INSPECTOR = + new LazyBigDecimalObjectInspector(); static HashMap, LazyStringObjectInspector> cachedLazyStringObjectInspector = new HashMap, LazyStringObjectInspector>(); @@ -101,6 +103,8 @@ public final class LazyPrimitiveObjectInspectorFactory { return LAZY_VOID_OBJECT_INSPECTOR; case TIMESTAMP: return LAZY_TIMESTAMP_OBJECT_INSPECTOR; + case DECIMAL: + return LAZY_BIG_DECIMAL_OBJECT_INSPECTOR; default: throw new RuntimeException("Internal error: Cannot find ObjectInspector " + " for " + primitiveCategory); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBigDecimal.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBigDecimal.java new file mode 100644 index 0000000..5d8a6a1 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBigDecimal.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBigDecimalObjectInspector; + +public class LazyBinaryBigDecimal extends + LazyBinaryPrimitive { + + LazyBinaryBigDecimal(WritableBigDecimalObjectInspector oi) { + super(oi); + data = new BigDecimalWritable(); + } + + LazyBinaryBigDecimal(LazyBinaryBigDecimal copy) { + super(copy); + data = new BigDecimalWritable(copy.data); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + data.setFromBytes(bytes.getData(), start, length); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java index 86f098f..3111cbc 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; @@ -75,6 +76,8 @@ public final class LazyBinaryFactory { return new LazyBinaryTimestamp((WritableTimestampObjectInspector) oi); case BINARY: return new LazyBinaryBinary((WritableBinaryObjectInspector) oi); + case DECIMAL: + return new LazyBinaryBigDecimal((WritableBigDecimalObjectInspector) oi); default: throw new RuntimeException("Internal error: no LazyBinaryObject for " + p); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index c640d6a..ec9c88a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -382,6 +384,14 @@ public class LazyBinarySerDe implements SerDe { t.writeToByteStream(byteStream); return warnedOnceNullMapKey; } + + case DECIMAL: { + BigDecimalObjectInspector bdoi = (BigDecimalObjectInspector) poi; + BigDecimalWritable t = bdoi.getPrimitiveWritableObject(obj); + t.writeToByteStream(byteStream); + return warnedOnceNullMapKey; + } + default: { throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java index e024cb9..cac20ff 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java @@ -203,6 +203,14 @@ public final class LazyBinaryUtils { recordInfo.elementSize += (byte) WritableUtils.decodeVIntSize(bytes[offset+4]); } break; + case DECIMAL: + // using vint instead of 4 bytes + LazyBinaryUtils.readVInt(bytes, offset, vInt); + recordInfo.elementOffset = 0; + recordInfo.elementSize = vInt.length; + LazyBinaryUtils.readVInt(bytes, offset + vInt.length, vInt); + recordInfo.elementSize += vInt.length + vInt.value; + break; default: { throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index 5392b0e..6070afb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -25,6 +25,7 @@ import java.util.Map; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableBinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.SettableDoubleObjectInspector; @@ -118,6 +119,10 @@ public final class ObjectInspectorConverters { return new PrimitiveObjectInspectorConverter.BinaryConverter( (PrimitiveObjectInspector)inputOI, (SettableBinaryObjectInspector)outputOI); + case DECIMAL: + return new PrimitiveObjectInspectorConverter.BigDecimalConverter( + (PrimitiveObjectInspector) inputOI, + (SettableBigDecimalObjectInspector) outputOI); default: throw new RuntimeException("Hive internal error: conversion of " diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index 975255f..67132fc 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -30,10 +30,12 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.BigDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; @@ -490,6 +492,9 @@ public final class ObjectInspectorUtils { TimestampWritable t = ((TimestampObjectInspector) poi) .getPrimitiveWritableObject(o); return t.hashCode(); + case DECIMAL: + return ((BigDecimalObjectInspector) poi).getPrimitiveWritableObject(o).hashCode(); + default: { throw new RuntimeException("Unknown type: " + poi.getPrimitiveCategory()); @@ -674,6 +679,13 @@ public final class ObjectInspectorUtils { .getPrimitiveWritableObject(o2); return t1.compareTo(t2); } + case DECIMAL: { + BigDecimalWritable t1 = ((BigDecimalObjectInspector) poi1) + .getPrimitiveWritableObject(o1); + BigDecimalWritable t2 = ((BigDecimalObjectInspector) poi2) + .getPrimitiveWritableObject(o2); + return t1.compareTo(t2); + } default: { throw new RuntimeException("Unknown type: " + poi1.getPrimitiveCategory()); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java index 970e884..aace3bb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/PrimitiveObjectInspector.java @@ -27,7 +27,7 @@ public interface PrimitiveObjectInspector extends ObjectInspector { * The primitive types supported by Hive. */ public static enum PrimitiveCategory { - VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, UNKNOWN + VOID, BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP, BINARY, DECIMAL, UNKNOWN }; /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/BigDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/BigDecimalObjectInspector.java new file mode 100644 index 0000000..44db243 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/BigDecimalObjectInspector.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import java.math.BigDecimal; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +/** + * A DecimalObjectInspector inspects an Object representing a BigDecimal. + */ +public interface BigDecimalObjectInspector extends PrimitiveObjectInspector { + + BigDecimalWritable getPrimitiveWritableObject(Object o); + + BigDecimal getPrimitiveJavaObject(Object o); +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaBigDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaBigDecimalObjectInspector.java new file mode 100644 index 0000000..382d7e8 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaBigDecimalObjectInspector.java @@ -0,0 +1,68 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import java.math.BigDecimal; +import java.math.BigInteger; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; + +public class JavaBigDecimalObjectInspector + extends AbstractPrimitiveJavaObjectInspector + implements SettableBigDecimalObjectInspector { + + protected JavaBigDecimalObjectInspector() { + super(PrimitiveObjectInspectorUtils.decimalTypeEntry); + } + + @Override + public BigDecimalWritable getPrimitiveWritableObject(Object o) { + return o == null ? null : new BigDecimalWritable((BigDecimal) o); + } + + @Override + public BigDecimal getPrimitiveJavaObject(Object o) { + return o == null ? null : (BigDecimal) o; + } + + @Override + public Object set(Object o, byte[] bytes, int scale) { + return new BigDecimal(new BigInteger(bytes), scale); + } + + @Override + public Object set(Object o, BigDecimal t) { + return t; + } + + @Override + public Object set(Object o, BigDecimalWritable t) { + return t == null ? null : t.getBigDecimal(); + } + + @Override + public Object create(byte[] bytes, int scale) { + return new BigDecimal(new BigInteger(bytes), scale); + } + + @Override + public Object create(BigDecimal t) { + return t == null ? null : new BigDecimal(t.unscaledValue(), t.scale()); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java index f0b16fa..d866a12 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorConverter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.serde2.objectinspector.primitive; +import java.math.BigDecimal; import java.sql.Timestamp; import org.apache.hadoop.hive.serde2.ByteStream; @@ -257,6 +258,30 @@ public class PrimitiveObjectInspectorConverter { } } + public static class BigDecimalConverter implements Converter { + + PrimitiveObjectInspector inputOI; + SettableBigDecimalObjectInspector outputOI; + Object r; + + public BigDecimalConverter(PrimitiveObjectInspector inputOI, + SettableBigDecimalObjectInspector outputOI) { + this.inputOI = inputOI; + this.outputOI = outputOI; + this.r = outputOI.create(BigDecimal.ZERO); + } + + @Override + public Object convert(Object input) { + if (input == null) { + return null; + } + return outputOI.set(r, PrimitiveObjectInspectorUtils.getBigDecimal(input, + inputOI)); + } + + } + public static class BinaryConverter implements Converter{ PrimitiveObjectInspector inputOI; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java index 7f61344..a39934c 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorFactory.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.serde2.objectinspector.primitive; import java.util.HashMap; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -69,6 +70,8 @@ public final class PrimitiveObjectInspectorFactory { new JavaTimestampObjectInspector(); public static final JavaBinaryObjectInspector javaByteArrayObjectInspector = new JavaBinaryObjectInspector(); + public static final JavaBigDecimalObjectInspector javaBigDecimalObjectInspector = + new JavaBigDecimalObjectInspector(); public static final WritableBooleanObjectInspector writableBooleanObjectInspector = new WritableBooleanObjectInspector(); @@ -92,6 +95,8 @@ public final class PrimitiveObjectInspectorFactory { new WritableTimestampObjectInspector(); public static final WritableBinaryObjectInspector writableBinaryObjectInspector = new WritableBinaryObjectInspector(); + public static final WritableBigDecimalObjectInspector writableBigDecimalObjectInspector = + new WritableBigDecimalObjectInspector(); private static HashMap cachedPrimitiveWritableInspectorCache = new HashMap(); @@ -118,6 +123,8 @@ public final class PrimitiveObjectInspectorFactory { writableTimestampObjectInspector); cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.BINARY, writableBinaryObjectInspector); + cachedPrimitiveWritableInspectorCache.put(PrimitiveCategory.DECIMAL, + writableBigDecimalObjectInspector); } private static HashMap cachedPrimitiveJavaInspectorCache = @@ -145,6 +152,8 @@ public final class PrimitiveObjectInspectorFactory { javaTimestampObjectInspector); cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.BINARY, javaByteArrayObjectInspector); + cachedPrimitiveJavaInspectorCache.put(PrimitiveCategory.DECIMAL, + javaBigDecimalObjectInspector); } /** @@ -191,6 +200,8 @@ public final class PrimitiveObjectInspectorFactory { return new WritableConstantStringObjectInspector((Text)value); case TIMESTAMP: return new WritableConstantTimestampObjectInspector((TimestampWritable)value); + case DECIMAL: + return new WritableConstantBigDecimalObjectInspector((BigDecimalWritable)value); case BINARY: return new WritableConstantBinaryObjectInspector((BytesWritable)value); case VOID: diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index fc71ae1..54268d9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -21,11 +21,13 @@ package org.apache.hadoop.hive.serde2.objectinspector.primitive; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import java.math.BigDecimal; import java.sql.Timestamp; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; @@ -178,6 +180,9 @@ public final class PrimitiveObjectInspectorUtils { public static final PrimitiveTypeEntry timestampTypeEntry = new PrimitiveTypeEntry( PrimitiveCategory.TIMESTAMP, serdeConstants.TIMESTAMP_TYPE_NAME, null, Object.class, TimestampWritable.class); + public static final PrimitiveTypeEntry decimalTypeEntry = new PrimitiveTypeEntry( + PrimitiveCategory.DECIMAL, serdeConstants.DECIMAL_TYPE_NAME, null, + Object.class, BigDecimalWritable.class); // The following is a complex type for special handling public static final PrimitiveTypeEntry unknownTypeEntry = new PrimitiveTypeEntry( @@ -195,6 +200,7 @@ public final class PrimitiveObjectInspectorUtils { registerType(byteTypeEntry); registerType(shortTypeEntry); registerType(timestampTypeEntry); + registerType(decimalTypeEntry); registerType(unknownTypeEntry); } @@ -362,6 +368,10 @@ public final class PrimitiveObjectInspectorUtils { return ((BinaryObjectInspector) oi1).getPrimitiveWritableObject(o1). equals(((BinaryObjectInspector) oi2).getPrimitiveWritableObject(o2)); } + case DECIMAL: { + return ((BigDecimalObjectInspector) oi1).getPrimitiveJavaObject(o1) + .equals(((BigDecimalObjectInspector) oi2).getPrimitiveJavaObject(o2)); + } default: return false; } @@ -391,6 +401,8 @@ public final class PrimitiveObjectInspectorUtils { case TIMESTAMP: return ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) .getDouble(); + case DECIMAL: + return ((BigDecimalObjectInspector) oi).getPrimitiveJavaObject(o).doubleValue(); default: throw new NumberFormatException(); } @@ -465,6 +477,10 @@ public final class PrimitiveObjectInspectorUtils { result = (((TimestampObjectInspector) oi) .getPrimitiveWritableObject(o).getSeconds() != 0); break; + case DECIMAL: + result = BigDecimal.ZERO.equals( + ((BigDecimalObjectInspector) oi).getPrimitiveJavaObject(o)); + break; default: throw new RuntimeException("Hive 2 Internal error: unknown type: " + oi.getTypeName()); @@ -545,6 +561,10 @@ public final class PrimitiveObjectInspectorUtils { result = (int) (((TimestampObjectInspector) oi) .getPrimitiveWritableObject(o).getSeconds()); break; + case DECIMAL: + result = ((BigDecimalObjectInspector) oi) + .getPrimitiveJavaObject(o).intValue(); + break; default: { throw new RuntimeException("Hive 2 Internal error: unknown type: " + oi.getTypeName()); @@ -599,6 +619,10 @@ public final class PrimitiveObjectInspectorUtils { result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) .getSeconds(); break; + case DECIMAL: + result = ((BigDecimalObjectInspector) oi) + .getPrimitiveJavaObject(o).longValue(); + break; default: throw new RuntimeException("Hive 2 Internal error: unknown type: " + oi.getTypeName()); @@ -646,6 +670,10 @@ public final class PrimitiveObjectInspectorUtils { case TIMESTAMP: result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).getDouble(); break; + case DECIMAL: + result = ((BigDecimalObjectInspector) oi) + .getPrimitiveJavaObject(o).doubleValue(); + break; default: throw new RuntimeException("Hive 2 Internal error: unknown type: " + oi.getTypeName()); @@ -706,6 +734,10 @@ public final class PrimitiveObjectInspectorUtils { case TIMESTAMP: result = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o).toString(); break; + case DECIMAL: + result = ((BigDecimalObjectInspector) oi) + .getPrimitiveJavaObject(o).toString(); + break; default: throw new RuntimeException("Hive 2 Internal error: unknown type: " + oi.getTypeName()); @@ -739,6 +771,55 @@ public final class PrimitiveObjectInspectorUtils { } } + public static BigDecimal getBigDecimal(Object o, PrimitiveObjectInspector oi) { + if (o == null) { + return null; + } + + BigDecimal result = null; + switch (oi.getPrimitiveCategory()) { + case VOID: + result = null; + break; + case BOOLEAN: + result = ((BooleanObjectInspector) oi).get(o) ? + BigDecimal.ONE : BigDecimal.ZERO; + break; + case BYTE: + result = new BigDecimal(((ByteObjectInspector) oi).get(o)); + break; + case SHORT: + result = new BigDecimal(((ShortObjectInspector) oi).get(o)); + break; + case INT: + result = new BigDecimal(((IntObjectInspector) oi).get(o)); + break; + case LONG: + result = new BigDecimal(((LongObjectInspector) oi).get(o)); + break; + case FLOAT: + result = new BigDecimal(((FloatObjectInspector) oi).get(o)); + break; + case DOUBLE: + result = new BigDecimal(((DoubleObjectInspector) oi).get(o)); + break; + case STRING: + result = new BigDecimal(((StringObjectInspector) oi).getPrimitiveJavaObject(o)); + break; + case TIMESTAMP: + result = new BigDecimal(((TimestampObjectInspector) oi).getPrimitiveWritableObject(o) + .getDouble()); + break; + case DECIMAL: + result = ((BigDecimalObjectInspector) oi).getPrimitiveJavaObject(o); + break; + default: + throw new RuntimeException("Hive 2 Internal error: unknown type: " + + oi.getTypeName()); + } + return result; + } + public static Timestamp getTimestamp(Object o, PrimitiveObjectInspector oi) { if (o == null) { return null; diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableBigDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableBigDecimalObjectInspector.java new file mode 100644 index 0000000..ff262b2 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/SettableBigDecimalObjectInspector.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import java.math.BigDecimal; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; + +/** + * A SettableDecimalObjectInspector can set a BigDecimal value to an object. + */ +public interface SettableBigDecimalObjectInspector extends BigDecimalObjectInspector { + + Object set(Object o, byte[] bytes, int scale); + + Object set(Object o, BigDecimal t); + + Object set(Object o, BigDecimalWritable t); + + Object create(byte[] bytes, int scale); + + Object create (BigDecimal t); + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableBigDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableBigDecimalObjectInspector.java new file mode 100644 index 0000000..88184cf --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableBigDecimalObjectInspector.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import java.math.BigDecimal; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; + +public class WritableBigDecimalObjectInspector + extends AbstractPrimitiveWritableObjectInspector + implements SettableBigDecimalObjectInspector { + + protected WritableBigDecimalObjectInspector() { + super(PrimitiveObjectInspectorUtils.decimalTypeEntry); + } + + @Override + public BigDecimalWritable getPrimitiveWritableObject(Object o) { + return o == null ? null : (BigDecimalWritable) o; + } + + @Override + public BigDecimal getPrimitiveJavaObject(Object o) { + return o == null ? null : ((BigDecimalWritable) o).getBigDecimal(); + } + + @Override + public Object copyObject(Object o) { + return o == null ? null : new BigDecimalWritable((BigDecimalWritable) o); + } + + @Override + public Object set(Object o, byte[] bytes, int scale) { + ((BigDecimalWritable) o).set(bytes, scale); + return o; + } + + @Override + public Object set(Object o, BigDecimal t) { + ((BigDecimalWritable) o).set(t); + return o; + } + + @Override + public Object set(Object o, BigDecimalWritable t) { + ((BigDecimalWritable) o).set(t); + return o; + } + + @Override + public Object create(byte[] bytes, int scale) { + return new BigDecimalWritable(bytes, scale); + } + + @Override + public Object create(BigDecimal t) { + return new BigDecimalWritable(t); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java new file mode 100644 index 0000000..672b106 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableConstantBigDecimalObjectInspector.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector.primitive; + +import org.apache.hadoop.hive.serde2.io.BigDecimalWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; + +/** + * A WritableConstantBigDecimalObjectInspector is a WritableBigDecimalObjectInspector + * that implements ConstantObjectInspector. + */ +public class WritableConstantBigDecimalObjectInspector extends WritableBigDecimalObjectInspector + implements ConstantObjectInspector { + + private final BigDecimalWritable value; + + WritableConstantBigDecimalObjectInspector(BigDecimalWritable value) { + this.value = value; + } + + @Override + public BigDecimalWritable getWritableConstantValue() { + return value; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java index 4f9fa75..b7ef987 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -64,6 +64,7 @@ public final class TypeInfoFactory { public static final TypeInfo shortTypeInfo = getPrimitiveTypeInfo(serdeConstants.SMALLINT_TYPE_NAME); public static final TypeInfo timestampTypeInfo = getPrimitiveTypeInfo(serdeConstants.TIMESTAMP_TYPE_NAME); public static final TypeInfo binaryTypeInfo = getPrimitiveTypeInfo(serdeConstants.BINARY_TYPE_NAME); + public static final TypeInfo decimalTypeInfo = getPrimitiveTypeInfo(serdeConstants.DECIMAL_TYPE_NAME); public static final TypeInfo unknownTypeInfo = getPrimitiveTypeInfo("unknown");