diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreUtils.java index a66c13507abef42977dfdb315ff7d69404f67ac3..3df02f3ea7e298e185d57d8a19ee5bcf1065dcca 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreUtils.java @@ -207,7 +207,7 @@ private static String determineFieldComment(String comment) { public static FieldSchema getFieldSchemaFromTypeInfo(String fieldName, TypeInfo typeInfo) { return new FieldSchema(fieldName, typeInfo.getTypeName(), - "generated by TypeInfoUtils.getFieldSchemaFromTypeInfo"); + "generated by StorageSchemaUtils.getFieldSchemaFromTypeInfo"); } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/SerDeStorageSchemaReader.java b/metastore/src/java/org/apache/hadoop/hive/metastore/SerDeStorageSchemaReader.java index 59bcd5ca34d5083d357d7157abf3682399060a1a..05e10784a4ac075a458db30976d047447b12d7a0 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/SerDeStorageSchemaReader.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/SerDeStorageSchemaReader.java @@ -27,6 +27,10 @@ import java.util.List; +/** + * In order to use this Storage schema reader you should add the hive-serde jar in the classpath + * of the metastore. + */ public class SerDeStorageSchemaReader implements StorageSchemaReader { @Override public List readSchema(Table tbl, EnvironmentContext envContext, Configuration conf) diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java new file mode 100644 index 0000000000000000000000000000000000000000..9fd773d13d00ca072a91eccd029e6daaaf283271 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/AvroStorageSchemaReader.java @@ -0,0 +1,35 @@ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.EnvironmentContext; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +public class AvroStorageSchemaReader implements StorageSchemaReader { + private static final Logger LOG = LoggerFactory.getLogger(AvroStorageSchemaReader.class); + + @Override + public List readSchema(Table tbl, EnvironmentContext envContext, + Configuration conf) throws MetaException { + Properties tblMetadataProperties = MetaStoreUtils.getTableMetadata(tbl); + try { + return AvroSchemaUtils.getFieldsFromAvroSchema(conf, tblMetadataProperties); + } catch (AvroSerdeException e) { + LOG.warn("Exception received while reading avro schema for table " + tbl.getTableName(), e); + throw new MetaException(e.getMessage()); + } catch (IOException e) { + LOG.warn("Received IOException while reading avro schema for table " + tbl.getTableName(), e); + throw new MetaException(e.getMessage()); + } + } +} \ No newline at end of file diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java index d5dea4dc3ca6a83a863326e1c75e2480898d00af..23d77ebdbdcb83699e8b6cc030fbddf4bcaecbb3 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ColumnType.java @@ -72,6 +72,9 @@ public static final String TIMESTAMPTZ_TYPE_NAME = "timestamp with time zone"; + //TODO:HIVE-17580 seems like this type is missing here? + public static final String TIMESTAMPLOCALTZ_TYPE_NAME = "timestamp with local time zone"; + public static final String LIST_TYPE_NAME = "array"; public static final String MAP_TYPE_NAME = "map"; @@ -86,6 +89,32 @@ public static final String COLUMN_NAME_DELIMITER = "column.name.delimiter"; + //used for validation of the params while parsing the types using MetastoreTypeParser + public static final int MAX_VARCHAR_LENGTH = 65535; + public static final int MAX_CHAR_LENGTH = 255; + + /** + * Default precision/scale when system is not able to determine them, such as in case + * of a non-generic udf. + */ + //HiveDecimalV1 + public static final int SYSTEM_DEFAULT_PRECISION = 38; + public static final int SYSTEM_DEFAULT_SCALE = 18; + + /** + * Default precision/scale when user doesn't specify in the column metadata, such as + * decimal and decimal(8). + */ + //HiveDecimalVersionV1 + public static final int USER_DEFAULT_PRECISION = 10; + //HiveDecimalVersionV1 + public static final int USER_DEFAULT_SCALE = 0; + + //HiveDecimalVersionV1 + public static final int MAX_PRECISION = 38; + //HiveDecimalVersionV1 + public static final int MAX_SCALE = 38; + public static final Set PrimitiveTypes = StringUtils.asSet( VOID_TYPE_NAME, BOOLEAN_TYPE_NAME, @@ -185,6 +214,7 @@ MAP_TYPE_NAME, STRUCT_TYPE_NAME, UNION_TYPE_NAME, + //TODO:HIVE-17580 : Why are these in allTypes? This will probably mess up MetastoreTypeInfoFactory create method LIST_COLUMNS, LIST_COLUMN_TYPES, COLUMN_NAME_DELIMITER @@ -224,7 +254,7 @@ public static boolean areColTypesCompatible(String from, String to) { if (from.equals(VOID_TYPE_NAME)) return true; // Allow date to string casts. NOTE: I suspect this is the reverse of what we actually - // want, but it matches the code in o.a.h.h.serde2.typeinfo.TypeInfoUtils. I can't see how + // want, but it matches the code in o.a.h.h.serde2.typeinfo.StorageSchemaUtils. I can't see how // users would be altering date columns into string columns. The other I easily see since // Hive did not originally support datetime types. Also, the comment in the Hive code // says string to date, even though the code does the opposite. But for now I'm keeping @@ -248,6 +278,8 @@ public static boolean areColTypesCompatible(String from, String to) { public static final char COLUMN_COMMENTS_DELIMITER = '\0'; + public static final String LIST_COLUMN_COMMENTS = "columns.comments"; + private static HashMap typeToThriftTypeMap; static { typeToThriftTypeMap = new HashMap<>(); diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java index 1dbfa4272cd5368242d335fbde564d35cac26bba..44bcee1de002d963d66abbb69ac65ecf5658ca33 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/DefaultStorageSchemaReader.java @@ -18,21 +18,131 @@ package org.apache.hadoop.hive.metastore; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.api.EnvironmentContext; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.StorageSchemaUtils; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.Properties; + +import static org.apache.hadoop.hive.metastore.ColumnType.LIST_COLUMN_COMMENTS; /** * Default StorageSchemaReader. This just throws as the metastore currently doesn't know how to * read schemas from storage. */ public class DefaultStorageSchemaReader implements StorageSchemaReader { + private final static Logger LOG = LoggerFactory.getLogger(DefaultStorageSchemaReader.class); + + private static final String AVRO_SERIALIZATION_LIB = + "org.apache.hadoop.hive.serde2.avro.AvroSerDe"; + @Override public List readSchema(Table tbl, EnvironmentContext envContext, - Configuration conf) throws MetaException { - throw new UnsupportedOperationException("Storage schema reading not supported"); + Configuration conf) throws MetaException { + String serializationLib = tbl.getSd().getSerdeInfo().getSerializationLib(); + if (null == serializationLib || MetastoreConf + .getStringCollection(conf, MetastoreConf.ConfVars.SERDES_USING_METASTORE_FOR_SCHEMA) + .contains(serializationLib)) { + //safety check to make sure we should be using storage schema reader for this table + throw new MetaException( + "Invalid usage of default storage schema reader for table " + tbl.getTableName() + + " with storage descriptor " + tbl.getSd().getSerdeInfo().getSerializationLib()); + } + Properties tblMetadataProperties = MetaStoreUtils.getTableMetadata(tbl); + if(AVRO_SERIALIZATION_LIB.equals(serializationLib)) { + //in case of avro table use AvroStorageSchemaReader utils + try { + return AvroSchemaUtils.getFieldsFromAvroSchema(conf, tblMetadataProperties); + } catch (AvroSerdeException e) { + LOG.warn("Exception received while reading avro schema for table " + tbl.getTableName(), e); + throw new MetaException(e.getMessage()); + } catch (IOException e) { + LOG.warn("Exception received while reading avro schema for table " + tbl.getTableName(), e); + throw new MetaException(e.getMessage()); + } + } else { + return getFieldSchemasFromTableMetadata(tblMetadataProperties); + } + } + + /** + * This method implements a generic way to get the FieldSchemas from the table metadata + * properties like column names and column types. Most of the serdes have the same implemention + * in their initialize method + * //TODO refactor the common code from the serdes and move it to serde-api so that there is no + * //duplicate code + * + * @return list of FieldSchema objects + */ + public static List getFieldSchemasFromTableMetadata( + Properties tblMetadataProperties) { + List columnNames = null; + List< MetastoreTypeInfo> columnTypes = null; + // Get column names and types + String columnNameProperty = tblMetadataProperties.getProperty( ColumnType.LIST_COLUMNS); + String columnTypeProperty = tblMetadataProperties.getProperty( ColumnType.LIST_COLUMN_TYPES); + final String columnNameDelimiter = tblMetadataProperties + .containsKey( ColumnType.COLUMN_NAME_DELIMITER) ? tblMetadataProperties + .getProperty( ColumnType.COLUMN_NAME_DELIMITER) : String + .valueOf(StorageSchemaUtils.COMMA); + // all table column names + if (columnNameProperty.isEmpty()) { + columnNames = Collections.emptyList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); + } + + // all column types + if (columnTypeProperty.isEmpty()) { + columnTypes = Collections.emptyList(); + } else { + columnTypes = StorageSchemaUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + + final String columnCommentProperty = + tblMetadataProperties.getProperty(LIST_COLUMN_COMMENTS, ""); + List columnComments = null; + if (columnCommentProperty == null || columnCommentProperty.isEmpty()) { + columnComments = new ArrayList<>(0); + } else { + columnComments = Arrays.asList( + columnCommentProperty.split(String.valueOf(ColumnType.COLUMN_COMMENTS_DELIMITER))); + } + LOG.debug("columns: {}, {}", columnNameProperty, columnNames); + LOG.debug("types: {}, {} ", columnTypeProperty, columnTypes); + LOG.debug("comments: {} ", columnCommentProperty); + return getFieldSchemaFromColumnInfo(columnNames, columnTypes, columnComments); + } + + private static List getFieldSchemaFromColumnInfo(List columnNames, + List columnTypes, List columnComments) { + int len = columnNames.size(); + List fieldSchemas = new ArrayList<>(len); + for (int i = 0; i < len; i++) { + FieldSchema fieldSchema = new FieldSchema(); + fieldSchema.setName(columnNames.get(i)); + //In case of complex types getTypeName() will recusively go into typeName + //of individual fields when the ColumnType was constructed + //in SchemaToTypeInfo.generateColumnTypes in the constructor + fieldSchema.setType(columnTypes.get(i).getTypeName()); + fieldSchema.setComment(StorageSchemaUtils.determineFieldComment(columnComments.get(i))); + } + return fieldSchemas; } } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java index 6251e23991b8f898f939f5848fc5cbf5e8ceb07c..aff9bc89e2d43a92561b6e24c4d9d158644964af 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/StorageSchemaReader.java @@ -31,8 +31,7 @@ * An interface to implement reading schemas from stored data. */ @InterfaceAudience.Public -@InterfaceStability.Evolving -interface StorageSchemaReader { +@InterfaceStability.Evolving public interface StorageSchemaReader { /** * Read the schema from the storage representation of the table. * @param tbl metastore table object diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/BaseMetastoreCharTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/BaseMetastoreCharTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..23883f4205a3bf97ab8cad235e25d10372cc7143 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/BaseMetastoreCharTypeInfo.java @@ -0,0 +1,41 @@ +package org.apache.hadoop.hive.metastore.type; + +public abstract class BaseMetastoreCharTypeInfo extends PrimitiveMetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + private int length; + + // no-arg constructor to make kyro happy. + public BaseMetastoreCharTypeInfo() { + } + + public BaseMetastoreCharTypeInfo(String typeName) { + super(typeName); + } + + public BaseMetastoreCharTypeInfo(String typeName, int length) { + super(typeName); + this.length = length; + } + + public int getLength() { + return length; + } + + public void setLength(int length) { + this.length = length; + } + + @Override + public String getQualifiedName() { + return getQualifiedName(typeName, length); + } + + public static String getQualifiedName(String typeName, int length) { + StringBuilder sb = new StringBuilder(typeName); + sb.append("("); + sb.append(length); + sb.append(")"); + return sb.toString(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/CharMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/CharMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..de976c3923daa435a1af00602502f431d15a29be --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/CharMetastoreTypeInfo.java @@ -0,0 +1,56 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +public class CharMetastoreTypeInfo extends BaseMetastoreCharTypeInfo { + private static final long serialVersionUID = 1L; + + // no-arg constructor to make kyro happy. + public CharMetastoreTypeInfo() { + super(ColumnType.CHAR_TYPE_NAME); + } + + public CharMetastoreTypeInfo(int length) { + super(ColumnType.CHAR_TYPE_NAME, length); + validateCharParameter(length); + } + + public static void validateCharParameter(int length) { + if (length > ColumnType.MAX_CHAR_LENGTH || length < 1) { + throw new RuntimeException("Char length " + length + " out of allowed range [1, " + + ColumnType.MAX_CHAR_LENGTH + "]"); + } + } + + @Override + public String getTypeName() { + return getQualifiedName(); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + CharMetastoreTypeInfo pti = (CharMetastoreTypeInfo) other; + + return this.typeName.equals(pti.typeName) && this.getLength() == pti.getLength(); + } + + /** + * Generate the hashCode for this TypeInfo. + */ + @Override + public int hashCode() { + return getQualifiedName().hashCode(); + } + + @Override + public String toString() { + return getQualifiedName(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/DecimalMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/DecimalMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..8927f7734a7ea7496b4a9a8f7031c065f38121aa --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/DecimalMetastoreTypeInfo.java @@ -0,0 +1,111 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +public class DecimalMetastoreTypeInfo extends PrimitiveMetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + private int precision; + private int scale; + + // no-arg constructor for deserialization. + public DecimalMetastoreTypeInfo() { + super(ColumnType.DECIMAL_TYPE_NAME); + } + + public DecimalMetastoreTypeInfo(int precision, int scale) { + super(ColumnType.DECIMAL_TYPE_NAME); + validateParameter(precision, scale); + this.precision = precision; + this.scale = scale; + } + + public static void validateParameter(int precision, int scale) { + if (precision < 1 || precision > ColumnType.MAX_PRECISION) { + throw new IllegalArgumentException("Decimal precision out of allowed range [1," + + ColumnType.MAX_PRECISION + "]"); + } + + if (scale < 0 || scale > ColumnType.MAX_SCALE) { + throw new IllegalArgumentException("Decimal scale out of allowed range [0," + + ColumnType.MAX_SCALE + "]"); + } + + if (precision < scale) { + throw new IllegalArgumentException("Decimal scale must be less than or equal to precision"); + } + } + + @Override + public String getTypeName() { + return getQualifiedName(); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + DecimalMetastoreTypeInfo dti = (DecimalMetastoreTypeInfo)other; + + return this.precision() == dti.precision() && this.scale() == dti.scale(); + + } + + /** + * Generate the hashCode for this TypeInfo. + */ + @Override + public int hashCode() { + return 31 * (17 + precision) + scale; + } + + @Override + public String toString() { + return getQualifiedName(); + } + + @Override + public String getQualifiedName() { + return getQualifiedName(precision, scale); + } + + public static String getQualifiedName(int precision, int scale) { + StringBuilder sb = new StringBuilder(ColumnType.DECIMAL_TYPE_NAME); + sb.append("("); + sb.append(precision); + sb.append(","); + sb.append(scale); + sb.append(")"); + return sb.toString(); + } + + public int precision() { + return precision; + } + + public int scale() { + return scale; + } + + public int getPrecision() { + return precision; + } + + public void setPrecision(int precision) { + this.precision = precision; + } + + public int getScale() { + return scale; + } + + public void setScale(int scale) { + this.scale = scale; + } + +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/ListMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/ListMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..022f246d113ce374fc26dbf173b80709d18ccb45 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/ListMetastoreTypeInfo.java @@ -0,0 +1,60 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +public class ListMetastoreTypeInfo extends MetastoreTypeInfo { + private static final long serialVersionUID = 1L; + private MetastoreTypeInfo listElementTypeInfo; + + /** + * For java serialization use only. + */ + public ListMetastoreTypeInfo() { + } + + @Override + public String getTypeName() { + return ColumnType.LIST_TYPE_NAME + "<" + + listElementTypeInfo.getTypeName() + ">"; + } + + /** + * For java serialization use only. + */ + public void setListElementTypeInfo(MetastoreTypeInfo listElementTypeInfo) { + this.listElementTypeInfo = listElementTypeInfo; + } + + /** + * For TypeInfoFactory use only. + */ + ListMetastoreTypeInfo(MetastoreTypeInfo elementTypeInfo) { + listElementTypeInfo = elementTypeInfo; + } + + @Override + public Category getCategory() { + return Category.LIST; + } + + public MetastoreTypeInfo getListElementTypeInfo() { + return listElementTypeInfo; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof ListMetastoreTypeInfo)) { + return false; + } + return getListElementTypeInfo().equals( + ((ListMetastoreTypeInfo) other).getListElementTypeInfo()); + } + + @Override + public int hashCode() { + return listElementTypeInfo.hashCode(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MapMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MapMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..bcc405d8a2f9f461fa85589d9e6fc5749cae8a0d --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MapMetastoreTypeInfo.java @@ -0,0 +1,74 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +public class MapMetastoreTypeInfo extends MetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + private MetastoreTypeInfo mapKeyTypeInfo; + private MetastoreTypeInfo mapValueTypeInfo; + + /** + * For java serialization use only. + */ + public MapMetastoreTypeInfo() { + } + + @Override + public String getTypeName() { + return ColumnType.MAP_TYPE_NAME + "<" + + mapKeyTypeInfo.getTypeName() + "," + mapValueTypeInfo.getTypeName() + + ">"; + } + + /** + * For java serialization use only. + */ + public void setMapKeyTypeInfo(MetastoreTypeInfo mapKeyTypeInfo) { + this.mapKeyTypeInfo = mapKeyTypeInfo; + } + + /** + * For java serialization use only. + */ + public void setMapValueTypeInfo(MetastoreTypeInfo mapValueTypeInfo) { + this.mapValueTypeInfo = mapValueTypeInfo; + } + + // For TypeInfoFactory use only + MapMetastoreTypeInfo(MetastoreTypeInfo keyTypeInfo, MetastoreTypeInfo valueTypeInfo) { + mapKeyTypeInfo = keyTypeInfo; + mapValueTypeInfo = valueTypeInfo; + } + + @Override + public Category getCategory() { + return Category.MAP; + } + + public MetastoreTypeInfo getMapKeyTypeInfo() { + return mapKeyTypeInfo; + } + + public MetastoreTypeInfo getMapValueTypeInfo() { + return mapValueTypeInfo; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof MetastoreTypeInfo)) { + return false; + } + MapMetastoreTypeInfo o = (MapMetastoreTypeInfo) other; + return o.getMapKeyTypeInfo().equals(getMapKeyTypeInfo()) + && o.getMapValueTypeInfo().equals(getMapValueTypeInfo()); + } + + @Override + public int hashCode() { + return mapKeyTypeInfo.hashCode() ^ mapValueTypeInfo.hashCode(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastorePrimitiveTypeCategory.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastorePrimitiveTypeCategory.java new file mode 100644 index 0000000000000000000000000000000000000000..ba3ae52c66d847e98e80c670f6c4da87470ec043 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastorePrimitiveTypeCategory.java @@ -0,0 +1,67 @@ +package org.apache.hadoop.hive.metastore.type; + +import static org.apache.hadoop.hive.metastore.ColumnType.BIGINT_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.BINARY_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.BOOLEAN_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.CHAR_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.DATETIME_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.DATE_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.DECIMAL_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.DOUBLE_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.FLOAT_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.INTERVAL_DAY_TIME_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.INTERVAL_YEAR_MONTH_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.INT_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.SMALLINT_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.STRING_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.TIMESTAMPLOCALTZ_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.TIMESTAMPTZ_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.TIMESTAMP_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.TINYINT_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.VARCHAR_TYPE_NAME; +import static org.apache.hadoop.hive.metastore.ColumnType.VOID_TYPE_NAME; + +public enum MetastorePrimitiveTypeCategory { + //this mapping is derived from PrimitiveObjectInspectorUtils in Hive + //It maps the primitive category with the string representation of the type + //Note that tinyint maps to byte, smallint maps to short and bigint maps to long below + VOID(VOID_TYPE_NAME), + BOOLEAN(BOOLEAN_TYPE_NAME), + BYTE(TINYINT_TYPE_NAME), + SHORT(SMALLINT_TYPE_NAME), + INT(INT_TYPE_NAME), + LONG(BIGINT_TYPE_NAME), + FLOAT(FLOAT_TYPE_NAME), + DOUBLE(DOUBLE_TYPE_NAME), + STRING(STRING_TYPE_NAME), + VARCHAR(VARCHAR_TYPE_NAME), + CHAR(CHAR_TYPE_NAME), + DATE(DATE_TYPE_NAME), + DATETIME(DATETIME_TYPE_NAME), + TIMESTAMP(TIMESTAMP_TYPE_NAME), + INTERVAL_YEAR_MONTH(INTERVAL_YEAR_MONTH_TYPE_NAME), + INTERVAL_DAY_TIME(INTERVAL_DAY_TIME_TYPE_NAME), + DECIMAL(DECIMAL_TYPE_NAME), + BINARY(BINARY_TYPE_NAME), + TIMESTAMPTZ(TIMESTAMPTZ_TYPE_NAME), + TIMESTAMPLOCALTZ(TIMESTAMPLOCALTZ_TYPE_NAME); + + private final String typeName; + MetastorePrimitiveTypeCategory(String typeName) { + this.typeName = typeName; + } + + public String getTypeName() { + return typeName; + } + + public static MetastorePrimitiveTypeCategory from(String typeName) { + for (MetastorePrimitiveTypeCategory primitiveTypeCategory : MetastorePrimitiveTypeCategory + .values()) { + if (primitiveTypeCategory.getTypeName().equals(typeName)) { + return primitiveTypeCategory; + } + } + return null; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..dceb4fda59b5c53279157262fb25cc7747c1ceaf --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfo.java @@ -0,0 +1,24 @@ +package org.apache.hadoop.hive.metastore.type; + +public abstract class MetastoreTypeInfo { + /** + * Category. + * + */ + public static enum Category { + PRIMITIVE, LIST, MAP, STRUCT, UNION + }; + + public abstract Category getCategory(); + public abstract String getTypeName(); + public abstract boolean equals(Object o); + public abstract int hashCode(); + /** + * String representing the qualified type name. + * Qualified types should override this method. + * @return + */ + public String getQualifiedName() { + return getTypeName(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfoFactory.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfoFactory.java new file mode 100644 index 0000000000000000000000000000000000000000..71990271c192e9ad6927b931342e647a66ce4328 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfoFactory.java @@ -0,0 +1,257 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +import java.time.ZoneId; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ConcurrentHashMap; + +public class MetastoreTypeInfoFactory { + // Map from type name (such as int or varchar(40) to the corresponding PrimitiveTypeInfo + // instance. + private static ConcurrentHashMap cachedPrimitiveTypeInfo = + new ConcurrentHashMap(); + + //non-parameterized primitive types + private static final MetastoreTypeInfo voidTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.VOID_TYPE_NAME); + private static final MetastoreTypeInfo booleanTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.BOOLEAN_TYPE_NAME); + private static final MetastoreTypeInfo intTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.INT_TYPE_NAME); + private static final MetastoreTypeInfo longTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.BIGINT_TYPE_NAME); + private static final MetastoreTypeInfo stringTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.STRING_TYPE_NAME); + private static final MetastoreTypeInfo floatTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.FLOAT_TYPE_NAME); + private static final MetastoreTypeInfo doubleTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.DOUBLE_TYPE_NAME); + private static final MetastoreTypeInfo byteTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.TINYINT_TYPE_NAME); + private static final MetastoreTypeInfo shortTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.SMALLINT_TYPE_NAME); + + //date and time primitives + private static final MetastoreTypeInfo dateTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.DATE_TYPE_NAME); + private static final MetastoreTypeInfo timestampTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.TIMESTAMP_TYPE_NAME); + /** + * A TimestampTZTypeInfo with system default time zone. + */ + private static final MetastoreTypeInfo timestampLocalTZTypeInfo = + new TimestampLocalTZMetastoreTypeInfo(ZoneId.systemDefault().getId()); + + private static final MetastoreTypeInfo intervalYearMonthTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.INTERVAL_YEAR_MONTH_TYPE_NAME); + private static final MetastoreTypeInfo intervalDayTimeTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.INTERVAL_DAY_TIME_TYPE_NAME); + private static final MetastoreTypeInfo binaryTypeInfo = + new PrimitiveMetastoreTypeInfo(ColumnType.BINARY_TYPE_NAME); + + /** + * A DecimalTypeInfo instance that has max precision and max scale. + */ + //parameterized type infos + private static final MetastoreTypeInfo decimalTypeInfo = + new DecimalMetastoreTypeInfo(ColumnType.SYSTEM_DEFAULT_PRECISION, + ColumnType.SYSTEM_DEFAULT_SCALE); + + private static final MetastoreTypeInfo charTypeInfo = + new CharMetastoreTypeInfo(ColumnType.MAX_CHAR_LENGTH); + + private static final MetastoreTypeInfo varcharTypeInfo = + new VarcharMetastoreTypeInfo(ColumnType.MAX_VARCHAR_LENGTH); + + static { + cachedPrimitiveTypeInfo.put(ColumnType.VOID_TYPE_NAME, voidTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.BOOLEAN_TYPE_NAME, booleanTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.INT_TYPE_NAME, intTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.BIGINT_TYPE_NAME, longTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.STRING_TYPE_NAME, stringTypeInfo); + cachedPrimitiveTypeInfo.put(charTypeInfo.getQualifiedName(), charTypeInfo); + cachedPrimitiveTypeInfo.put(varcharTypeInfo.getQualifiedName(), varcharTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.FLOAT_TYPE_NAME, floatTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.DOUBLE_TYPE_NAME, doubleTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.TINYINT_TYPE_NAME, byteTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.SMALLINT_TYPE_NAME, shortTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.DATE_TYPE_NAME, dateTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.TIMESTAMP_TYPE_NAME, timestampTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.TIMESTAMPLOCALTZ_TYPE_NAME, timestampLocalTZTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.INTERVAL_YEAR_MONTH_TYPE_NAME, intervalYearMonthTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.INTERVAL_DAY_TIME_TYPE_NAME, intervalDayTimeTypeInfo); + cachedPrimitiveTypeInfo.put(ColumnType.BINARY_TYPE_NAME, binaryTypeInfo); + cachedPrimitiveTypeInfo.put(decimalTypeInfo.getQualifiedName(), decimalTypeInfo); + } + + /** + * Get PrimitiveTypeInfo instance for the given type name of a type + * including types with parameters, such as varchar(20). + * + * @param typeName type name possibly with parameters. + * @return aPrimitiveTypeInfo instance + */ + public static MetastoreTypeInfo getPrimitiveTypeInfo(String typeName) { + MetastoreTypeInfo result = cachedPrimitiveTypeInfo.get(typeName); + if (result != null) { + return result; + } + + // Not found in the cache. Must be parameterized types. Create it. + result = createPrimitiveMetastoreTypeInfo(typeName); + if (result == null) { + throw new RuntimeException("Error creating PrimitiveTypeInfo instance for " + typeName); + } + + MetastoreTypeInfo prev = cachedPrimitiveTypeInfo.putIfAbsent(typeName, result); + if (prev != null) { + result = prev; + } + return result; + } + + private static String getBaseName(String typeName) { + int idx = typeName.indexOf('('); + if (idx == -1) { + return typeName; + } else { + return typeName.substring(0, idx); + } + } + /** + * Create PrimitiveTypeInfo instance for the given full name of the type. The returned + * type is one of the parameterized type info such as VarcharTypeInfo. + * + * @param fullName Fully qualified name of the type + * @return PrimitiveTypeInfo instance + */ + private static PrimitiveMetastoreTypeInfo createPrimitiveMetastoreTypeInfo(String fullName) { + String baseName = getBaseName(fullName); + if (!ColumnType.AllTypes.contains(baseName)) { + throw new RuntimeException("Unknown type " + fullName); + } + + MetastoreTypeInfoParser.PrimitiveParts parts = + new MetastoreTypeInfoParser(fullName).parsePrimitiveParts(); + if (parts.typeParams == null || parts.typeParams.length < 1) { + return null; + } + + switch (MetastorePrimitiveTypeCategory.from(baseName)) { + case CHAR: + if (parts.typeParams.length != 1) { + return null; + } + return new CharMetastoreTypeInfo(Integer.valueOf(parts.typeParams[0])); + case VARCHAR: + if (parts.typeParams.length != 1) { + return null; + } + return new VarcharMetastoreTypeInfo(Integer.valueOf(parts.typeParams[0])); + case DECIMAL: + if (parts.typeParams.length != 2) { + return null; + } + return new DecimalMetastoreTypeInfo(Integer.valueOf(parts.typeParams[0]), + Integer.valueOf(parts.typeParams[1])); + case TIMESTAMPLOCALTZ: + if (parts.typeParams.length != 1) { + return null; + } + return new TimestampLocalTZMetastoreTypeInfo(parts.typeParams[0]); + default: + return null; + } + } + + static ConcurrentHashMap>, MetastoreTypeInfo> cachedStructTypeInfo = + new ConcurrentHashMap<>(); + + public static MetastoreTypeInfo getStructTypeInfo(List names, + List typeInfos) { + ArrayList> signature = new ArrayList>(2); + signature.add(names); + signature.add(typeInfos); + MetastoreTypeInfo result = cachedStructTypeInfo.get(signature); + if (result == null) { + result = new StructMetastoreTypeInfo(names, typeInfos); + MetastoreTypeInfo prev = cachedStructTypeInfo.putIfAbsent(signature, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + static ConcurrentHashMap, MetastoreTypeInfo> cachedUnionTypeInfo = + new ConcurrentHashMap<>(); + + public static MetastoreTypeInfo getUnionTypeInfo(List typeInfos) { + MetastoreTypeInfo result = cachedUnionTypeInfo.get(typeInfos); + if (result == null) { + result = new UnionMetastoreTypeInfo(typeInfos); + MetastoreTypeInfo prev = cachedUnionTypeInfo.putIfAbsent(typeInfos, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + static ConcurrentHashMap cachedListTypeInfo = + new ConcurrentHashMap<>(); + + public static MetastoreTypeInfo getListTypeInfo(MetastoreTypeInfo elementTypeInfo) { + MetastoreTypeInfo result = cachedListTypeInfo.get(elementTypeInfo); + if (result == null) { + result = new ListMetastoreTypeInfo(elementTypeInfo); + MetastoreTypeInfo prev = cachedListTypeInfo.putIfAbsent(elementTypeInfo, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + static ConcurrentHashMap, MetastoreTypeInfo> cachedMapTypeInfo = + new ConcurrentHashMap<>(); + + public static MetastoreTypeInfo getMapTypeInfo(MetastoreTypeInfo keyTypeInfo, + MetastoreTypeInfo valueTypeInfo) { + ArrayList signature = new ArrayList (2); + signature.add(keyTypeInfo); + signature.add(valueTypeInfo); + MetastoreTypeInfo result = cachedMapTypeInfo.get(signature); + if (result == null) { + result = new MapMetastoreTypeInfo(keyTypeInfo, valueTypeInfo); + MetastoreTypeInfo prev = cachedMapTypeInfo.putIfAbsent(signature, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + public static CharMetastoreTypeInfo getCharTypeInfo(int length) { + String fullName = CharMetastoreTypeInfo.getQualifiedName(ColumnType.CHAR_TYPE_NAME, length); + return (CharMetastoreTypeInfo) getPrimitiveTypeInfo(fullName); + } + + public static VarcharMetastoreTypeInfo getVarcharTypeInfo(int length) { + String fullName = + VarcharMetastoreTypeInfo.getQualifiedName(ColumnType.VARCHAR_TYPE_NAME, length); + return (VarcharMetastoreTypeInfo) getPrimitiveTypeInfo(fullName); + } + + public static DecimalMetastoreTypeInfo getDecimalTypeInfo(int precision, int scale) { + String fullName = DecimalMetastoreTypeInfo.getQualifiedName(precision, scale); + return (DecimalMetastoreTypeInfo) getPrimitiveTypeInfo(fullName); + } + + public static TimestampLocalTZMetastoreTypeInfo getTimestampTZTypeInfo(ZoneId defaultTimeZone) { + String fullName = TimestampLocalTZMetastoreTypeInfo.getQualifiedName(defaultTimeZone); + return (TimestampLocalTZMetastoreTypeInfo) getPrimitiveTypeInfo(fullName); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfoParser.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfoParser.java new file mode 100644 index 0000000000000000000000000000000000000000..85f1ab6833dda5a2aa695e9d35b5235c116431fd --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/MetastoreTypeInfoParser.java @@ -0,0 +1,316 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +public class MetastoreTypeInfoParser { + + private final String typeInfoString; + private final ArrayList typeInfoTokens; + private ArrayList typeInfos; + private int iToken; + + public MetastoreTypeInfoParser(String columnTypeProperty) { + this.typeInfoString = columnTypeProperty; + typeInfoTokens = tokenize(columnTypeProperty); + } + + public List parseTypeInfos() { + typeInfos = new ArrayList(); + iToken = 0; + while (iToken < typeInfoTokens.size()) { + typeInfos.add(parseType()); + if (iToken < typeInfoTokens.size()) { + Token separator = typeInfoTokens.get(iToken); + if (",".equals(separator.text) || ";".equals(separator.text) + || ":".equals(separator.text)) { + iToken++; + } else { + throw new IllegalArgumentException( + "Error: ',', ':', or ';' expected at position " + + separator.position + " from '" + typeInfoString + "' " + + typeInfoTokens); + } + } + } + return typeInfos; + } + + + private static class Token { + public int position; + public String text; + public boolean isType; + + @Override + public String toString() { + return "" + position + ":" + text; + } + }; + + private static boolean isTypeChar(char c) { + return Character.isLetterOrDigit(c) || c == '_' || c == '.' || c == ' ' || c == '$'; + } + + /** + * Tokenize the typeInfoString. The rule is simple: all consecutive + * alphadigits and '_', '.' are in one token, and all other characters are + * one character per token. + * + * tokenize("map") should return + * ["map","<","int",",","string",">"] + * + * Note that we add '$' in new Calcite return path. As '$' will not appear + * in any type in Hive, it is safe to do so. + */ + private static ArrayList tokenize(String typeInfoString) { + ArrayList tokens = new ArrayList(0); + int begin = 0; + int end = 1; + while (end <= typeInfoString.length()) { + // last character ends a token? + // if there are quotes, all the text between the quotes + // is considered a single token (this can happen for + // timestamp with local time-zone) + if (begin > 0 && + typeInfoString.charAt(begin - 1) == '(' && + typeInfoString.charAt(begin) == '\'') { + // Ignore starting quote + begin++; + do { + end++; + } while (typeInfoString.charAt(end) != '\''); + } else if (typeInfoString.charAt(begin) == '\'' && + typeInfoString.charAt(begin + 1) == ')') { + // Ignore closing quote + begin++; + end++; + } + if (end == typeInfoString.length() + || !isTypeChar(typeInfoString.charAt(end - 1)) + || !isTypeChar(typeInfoString.charAt(end))) { + Token t = new Token(); + t.position = begin; + t.text = typeInfoString.substring(begin, end); + t.isType = isTypeChar(typeInfoString.charAt(begin)); + tokens.add(t); + begin = end; + } + end++; + } + return tokens; + } + + private Token peek() { + if (iToken < typeInfoTokens.size()) { + return typeInfoTokens.get(iToken); + } else { + return null; + } + } + + private Token expect(String item) { + return expect(item, null); + } + + private Token expect(String item, String alternative) { + if (iToken >= typeInfoTokens.size()) { + throw new IllegalArgumentException("Error: " + item + + " expected at the end of '" + typeInfoString + "'"); + } + Token t = typeInfoTokens.get(iToken); + if (item.equals("type")) { + if (!ColumnType.LIST_TYPE_NAME.equals(t.text) + && !ColumnType.MAP_TYPE_NAME.equals(t.text) + && !ColumnType.STRUCT_TYPE_NAME.equals(t.text) + && !ColumnType.UNION_TYPE_NAME.equals(t.text) + //TODO:HIVE-17580 do we need to support "unknown" type in metastore? + && null == MetastorePrimitiveTypeCategory.from(t.text) + && !t.text.equals(alternative)) { + throw new IllegalArgumentException("Error: " + item + + " expected at the position " + t.position + " of '" + + typeInfoString + "' but '" + t.text + "' is found."); + } + } else if (item.equals("name")) { + if (!t.isType && !t.text.equals(alternative)) { + throw new IllegalArgumentException("Error: " + item + + " expected at the position " + t.position + " of '" + + typeInfoString + "' but '" + t.text + "' is found."); + } + } else { + if (!item.equals(t.text) && !t.text.equals(alternative)) { + throw new IllegalArgumentException("Error: " + item + + " expected at the position " + t.position + " of '" + + typeInfoString + "' but '" + t.text + "' is found."); + } + } + iToken++; + return t; + } + + private String[] parseParams() { + List params = new LinkedList(); + + Token t = peek(); + if (t != null && t.text.equals("(")) { + expect("("); + + // checking for null in the for-loop condition prevents null-ptr exception + // and allows us to fail more gracefully with a parsing error. + for(t = peek(); (t == null) || !t.text.equals(")"); t = expect(",",")")) { + params.add(expect("name").text); + } + if (params.size() == 0) { + throw new IllegalArgumentException( + "type parameters expected for type string " + typeInfoString); + } + } + + return params.toArray(new String[params.size()]); + } + + private MetastoreTypeInfo parseType() { + + Token t = expect("type"); + + // Is this a primitive type? + if (ColumnType.PrimitiveTypes.contains(t.text)) { + MetastorePrimitiveTypeCategory primitiveTypeCategory = + MetastorePrimitiveTypeCategory.from(t.text); + String[] params = parseParams(); + switch (primitiveTypeCategory) { + case CHAR: + case VARCHAR: + if (params == null || params.length == 0) { + throw new IllegalArgumentException(t.text + + " type is specified without length: " + typeInfoString); + } + + int length = 1; + if (params.length == 1) { + length = Integer.parseInt(params[0]); + if (primitiveTypeCategory == MetastorePrimitiveTypeCategory.VARCHAR) { + VarcharMetastoreTypeInfo.validateVarcharParameter(length); + return MetastoreTypeInfoFactory.getVarcharTypeInfo(length); + } else { + CharMetastoreTypeInfo.validateCharParameter(length); + return MetastoreTypeInfoFactory.getCharTypeInfo(length); + } + } else if (params.length > 1) { + throw new IllegalArgumentException( + "Type " + t.text+ " only takes one parameter, but " + + params.length + " is seen"); + } + + case DECIMAL: + int precision = ColumnType.USER_DEFAULT_PRECISION; + int scale = ColumnType.USER_DEFAULT_SCALE; + if (params == null || params.length == 0) { + // It's possible that old metadata still refers to "decimal" as a column type w/o + // precision/scale. In this case, the default (10,0) is assumed. Thus, do nothing here. + } else if (params.length == 1) { + // only precision is specified + precision = Integer.valueOf(params[0]); + DecimalMetastoreTypeInfo.validateParameter(precision, scale); + } else if (params.length == 2) { + // New metadata always have two parameters. + precision = Integer.parseInt(params[0]); + scale = Integer.parseInt(params[1]); + DecimalMetastoreTypeInfo.validateParameter(precision, scale); + } else if (params.length > 2) { + throw new IllegalArgumentException("Type decimal only takes two parameter, but " + + params.length + " is seen"); + } + return MetastoreTypeInfoFactory.getDecimalTypeInfo(precision, scale); + + default: + return MetastoreTypeInfoFactory.getPrimitiveTypeInfo(t.text); + } + } + + // Is this a list type? + if (ColumnType.LIST_TYPE_NAME.equals(t.text)) { + expect("<"); + MetastoreTypeInfo listElementType = parseType(); + expect(">"); + return MetastoreTypeInfoFactory.getListTypeInfo(listElementType); + } + + // Is this a map type? + if (ColumnType.MAP_TYPE_NAME.equals(t.text)) { + expect("<"); + MetastoreTypeInfo mapKeyType = parseType(); + expect(","); + MetastoreTypeInfo mapValueType = parseType(); + expect(">"); + return MetastoreTypeInfoFactory.getMapTypeInfo(mapKeyType, mapValueType); + } + + // Is this a struct type? + if (ColumnType.STRUCT_TYPE_NAME.equals(t.text)) { + ArrayList fieldNames = new ArrayList(); + ArrayList fieldTypeInfos = new ArrayList(); + boolean first = true; + do { + if (first) { + expect("<"); + first = false; + } else { + Token separator = expect(">", ","); + if (separator.text.equals(">")) { + // end of struct + break; + } + } + Token name = expect("name",">"); + if (name.text.equals(">")) { + break; + } + fieldNames.add(name.text); + expect(":"); + fieldTypeInfos.add(parseType()); + } while (true); + + return MetastoreTypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); + } + // Is this a union type? + if (ColumnType.UNION_TYPE_NAME.equals(t.text)) { + List objectTypeInfos = new ArrayList(); + boolean first = true; + do { + if (first) { + expect("<"); + first = false; + } else { + Token separator = expect(">", ","); + if (separator.text.equals(">")) { + // end of union + break; + } + } + objectTypeInfos.add(parseType()); + } while (true); + + return MetastoreTypeInfoFactory.getUnionTypeInfo(objectTypeInfos); + } + + throw new RuntimeException("Internal error parsing position " + + t.position + " of '" + typeInfoString + "'"); + } + + public PrimitiveParts parsePrimitiveParts() { + PrimitiveParts parts = new PrimitiveParts(); + Token t = expect("type"); + parts.typeName = t.text; + parts.typeParams = parseParams(); + return parts; + } + + public static class PrimitiveParts { + public String typeName; + public String[] typeParams; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/PrimitiveMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/PrimitiveMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..e246634be34326ddb6d2e01b501334467c861c95 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/PrimitiveMetastoreTypeInfo.java @@ -0,0 +1,63 @@ +package org.apache.hadoop.hive.metastore.type; + +import java.util.Objects; + +public class PrimitiveMetastoreTypeInfo extends MetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + // Base name (varchar vs fully qualified name such as varchar(200)). + protected String typeName; + private MetastorePrimitiveTypeCategory primitiveTypeCategory; + + /** + * For java serialization use only. + */ + public PrimitiveMetastoreTypeInfo() { + } + + /** + * For MetastoreTypeInfoFactory use only. + */ + PrimitiveMetastoreTypeInfo(String typeName) { + this.typeName = typeName; + primitiveTypeCategory = MetastorePrimitiveTypeCategory.from(typeName); + } + + @Override + public Category getCategory() { + return Category.PRIMITIVE; + } + + @Override + public String getTypeName() { + return typeName; + } + + @Override + public String toString() { + return typeName; + } + + @Override + public boolean equals(Object o) { + if (this == o) + return true; + if (o == null || getClass() != o.getClass()) + return false; + PrimitiveMetastoreTypeInfo that = (PrimitiveMetastoreTypeInfo) o; + return Objects.equals(typeName, that.typeName); + } + + @Override + public int hashCode() { + + return Objects.hash(typeName); + } + + public MetastorePrimitiveTypeCategory getPrimitiveCategory() { + if (primitiveTypeCategory == null) { + primitiveTypeCategory = MetastorePrimitiveTypeCategory.from(typeName); + } + return primitiveTypeCategory; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/StructMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/StructMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..6c477ddda2e629c1829abfe8aa62d85ce89ff1f3 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/StructMetastoreTypeInfo.java @@ -0,0 +1,117 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class StructMetastoreTypeInfo extends MetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + private ArrayList allStructFieldNames; + private ArrayList allStructFieldTypeInfos; + + /** + * For java serialization use only. + */ + public StructMetastoreTypeInfo() { + } + + @Override + public String getTypeName() { + StringBuilder sb = new StringBuilder(); + sb.append(ColumnType.STRUCT_TYPE_NAME + "<"); + for (int i = 0; i < allStructFieldNames.size(); i++) { + if (i > 0) { + sb.append(","); + } + sb.append(allStructFieldNames.get(i)); + sb.append(":"); + sb.append(allStructFieldTypeInfos.get(i).getTypeName()); + } + sb.append(">"); + return sb.toString(); + } + + /** + * For java serialization use only. + */ + public void setAllStructFieldNames(ArrayList allStructFieldNames) { + this.allStructFieldNames = allStructFieldNames; + } + + /** + * For java serialization use only. + */ + public void setAllStructFieldTypeInfos( + ArrayList allStructFieldTypeInfos) { + this.allStructFieldTypeInfos = allStructFieldTypeInfos; + } + + /** + * For TypeInfoFactory use only. + */ + StructMetastoreTypeInfo(List names, List typeInfos) { + allStructFieldNames = new ArrayList(names); + allStructFieldTypeInfos = new ArrayList(typeInfos); + } + + @Override + public Category getCategory() { + return Category.STRUCT; + } + + public ArrayList getAllStructFieldNames() { + return allStructFieldNames; + } + + public ArrayList getAllStructFieldTypeInfos() { + return allStructFieldTypeInfos; + } + + public MetastoreTypeInfo getStructFieldTypeInfo(String field) { + String fieldLowerCase = field.toLowerCase(); + for (int i = 0; i < allStructFieldNames.size(); i++) { + if (fieldLowerCase.equalsIgnoreCase(allStructFieldNames.get(i))) { + return allStructFieldTypeInfos.get(i); + } + } + throw new RuntimeException("cannot find field " + field + + "(lowercase form: " + fieldLowerCase + ") in " + allStructFieldNames); + // return null; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof StructMetastoreTypeInfo)) { + return false; + } + StructMetastoreTypeInfo o = (StructMetastoreTypeInfo) other; + Iterator namesIterator = getAllStructFieldNames().iterator(); + Iterator otherNamesIterator = o.getAllStructFieldNames().iterator(); + + // Compare the field names using ignore-case semantics + while (namesIterator.hasNext() && otherNamesIterator.hasNext()) { + if (!namesIterator.next().equalsIgnoreCase(otherNamesIterator.next())) { + return false; + } + } + + // Different number of field names + if (namesIterator.hasNext() || otherNamesIterator.hasNext()) { + return false; + } + + // Compare the field types + return o.getAllStructFieldTypeInfos().equals(getAllStructFieldTypeInfos()); + } + + @Override + public int hashCode() { + return allStructFieldNames.hashCode() ^ allStructFieldTypeInfos.hashCode(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/TimestampLocalTZMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/TimestampLocalTZMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..44cfac78b13d6245e8b07c03566828de80423f10 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/TimestampLocalTZMetastoreTypeInfo.java @@ -0,0 +1,93 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +import java.time.DateTimeException; +import java.time.ZoneId; +import java.util.Objects; + +public class TimestampLocalTZMetastoreTypeInfo extends PrimitiveMetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + private ZoneId timeZone; + + public TimestampLocalTZMetastoreTypeInfo() { + super(ColumnType.TIMESTAMPLOCALTZ_TYPE_NAME); + } + + public TimestampLocalTZMetastoreTypeInfo(String timeZoneStr) { + super(ColumnType.TIMESTAMPLOCALTZ_TYPE_NAME); + this.timeZone = parseTimeZone(timeZoneStr); + } + + @Override + public String getTypeName() { + return ColumnType.TIMESTAMPLOCALTZ_TYPE_NAME; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + TimestampLocalTZMetastoreTypeInfo dti = (TimestampLocalTZMetastoreTypeInfo) other; + + return this.timeZone().equals(dti.timeZone()); + } + + /** + * Generate the hashCode for this TypeInfo. + */ + @Override + public int hashCode() { + return Objects.hash(typeName, timeZone); + } + + @Override + public String toString() { + return getQualifiedName(); + } + + @Override + public String getQualifiedName() { + return getQualifiedName(timeZone); + } + + public static String getQualifiedName(ZoneId timeZone) { + StringBuilder sb = new StringBuilder(ColumnType.TIMESTAMPLOCALTZ_TYPE_NAME); + sb.append("('"); + sb.append(timeZone); + sb.append("')"); + return sb.toString(); + } + + public ZoneId timeZone() { + return timeZone; + } + + public ZoneId getTimeZone() { + return timeZone; + } + + public void setTimeZone(ZoneId timeZone) { + this.timeZone = timeZone; + } + + public static ZoneId parseTimeZone(String timeZoneStr) { + if (timeZoneStr == null || timeZoneStr.trim().isEmpty() || + timeZoneStr.trim().toLowerCase().equals("local")) { + // default + return ZoneId.systemDefault(); + } + try { + return ZoneId.of(timeZoneStr); + } catch (DateTimeException e1) { + // default + throw new RuntimeException("Invalid time zone displacement value"); + } + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/UnionMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/UnionMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..f86193d283577a7e43dae256e053a7db8dea36b9 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/UnionMetastoreTypeInfo.java @@ -0,0 +1,76 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +import java.util.ArrayList; +import java.util.List; + +public class UnionMetastoreTypeInfo extends MetastoreTypeInfo { + private static final long serialVersionUID = 1L; + + private List allUnionObjectTypeInfos; + + /** + * For java serialization use only. + */ + public UnionMetastoreTypeInfo() { + } + + @Override + public String getTypeName() { + StringBuilder sb = new StringBuilder(); + sb.append(ColumnType.UNION_TYPE_NAME + "<"); + for (int i = 0; i < allUnionObjectTypeInfos.size(); i++) { + if (i > 0) { + sb.append(","); + } + sb.append(allUnionObjectTypeInfos.get(i).getTypeName()); + } + sb.append(">"); + return sb.toString(); + } + + /** + * For java serialization use only. + */ + public void setAllUnionObjectTypeInfos( + List allUnionObjectTypeInfos) { + this.allUnionObjectTypeInfos = allUnionObjectTypeInfos; + } + + /** + * For TypeInfoFactory use only. + */ + UnionMetastoreTypeInfo(List typeInfos) { + allUnionObjectTypeInfos = new ArrayList(); + allUnionObjectTypeInfos.addAll(typeInfos); + } + + @Override + public Category getCategory() { + return Category.UNION; + } + + public List getAllUnionObjectTypeInfos() { + return allUnionObjectTypeInfos; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof UnionMetastoreTypeInfo)) { + return false; + } + UnionMetastoreTypeInfo o = (UnionMetastoreTypeInfo) other; + + // Compare the field types + return o.getAllUnionObjectTypeInfos().equals(getAllUnionObjectTypeInfos()); + } + + @Override + public int hashCode() { + return allUnionObjectTypeInfos.hashCode(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/VarcharMetastoreTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/VarcharMetastoreTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..b45f2c2038ec6d1b479d554ba12fcdf24fe1aa86 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/type/VarcharMetastoreTypeInfo.java @@ -0,0 +1,56 @@ +package org.apache.hadoop.hive.metastore.type; + +import org.apache.hadoop.hive.metastore.ColumnType; + +public class VarcharMetastoreTypeInfo extends BaseMetastoreCharTypeInfo { + private static final long serialVersionUID = 1L; + + // no-arg constructor to make kyro happy. + public VarcharMetastoreTypeInfo() { + super(ColumnType.VARCHAR_TYPE_NAME); + } + + public VarcharMetastoreTypeInfo(int length) { + super(ColumnType.VARCHAR_TYPE_NAME, length); + validateVarcharParameter(length); + } + + public static void validateVarcharParameter(int length) { + if (length > ColumnType.MAX_VARCHAR_LENGTH || length < 1) { + throw new RuntimeException("Varchar length " + length + " out of allowed range [1, " + + ColumnType.MAX_VARCHAR_LENGTH + "]"); + } + } + + @Override + public String getTypeName() { + return getQualifiedName(); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (other == null || getClass() != other.getClass()) { + return false; + } + + VarcharMetastoreTypeInfo pti = (VarcharMetastoreTypeInfo) other; + + return this.getLength() == pti.getLength(); + } + + /** + * Generate the hashCode for this TypeInfo. + */ + @Override + public int hashCode() { + return getLength(); + } + + @Override + public String toString() { + return getQualifiedName(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java new file mode 100644 index 0000000000000000000000000000000000000000..425fdd2893a950173b4f88ec3d1e7917ce722fe6 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/AvroFieldSchemaGenerator.java @@ -0,0 +1,81 @@ +package org.apache.hadoop.hive.metastore.utils; + +import org.apache.avro.Schema; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfo; +import org.apache.hadoop.hive.serde2.avro.AvroSerdeException; +import org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo; + +import java.util.ArrayList; +import java.util.List; + +public class AvroFieldSchemaGenerator { + final private List columnNames; + final private List< MetastoreTypeInfo> columnTypes; + final private List columnComments; + + public AvroFieldSchemaGenerator(Schema schema) throws AvroSerdeException { + verifySchemaIsARecord(schema); + + this.columnNames = generateColumnNames(schema); + this.columnTypes = SchemaToTypeInfo.generateColumnTypes(schema); + this.columnComments = generateColumnComments(schema); + assert columnNames.size() == columnTypes.size(); + } + + private static void verifySchemaIsARecord(Schema schema) throws AvroSerdeException { + if(!schema.getType().equals(Schema.Type.RECORD)) { + throw new AvroSerdeException("Schema for table must be of type RECORD. " + + "Received type: " + schema.getType()); + } + } + + private static List generateColumnNames(Schema schema) { + List fields = schema.getFields(); + List fieldsList = new ArrayList(fields.size()); + + for (Schema.Field field : fields) { + fieldsList.add(field.name()); + } + + return fieldsList; + } + + private static List generateColumnComments(Schema schema) { + List fields = schema.getFields(); + List fieldComments = new ArrayList(fields.size()); + + for (Schema.Field field : fields) { + String fieldComment = field.doc() == null ? "" : field.doc(); + fieldComments.add(fieldComment); + } + + return fieldComments; + } + + public List getFieldSchemas() throws AvroSerdeException { + int len = columnNames.size(); + List fieldSchemas = new ArrayList<>(len); + for(int i = 0; i getFieldsFromAvroSchema(Configuration configuration, + Properties properties) throws AvroSerdeException, IOException { + // Reset member variables so we don't get in a half-constructed state + Schema schema = null; + List columnNames = null; + List columnTypes = null; + + final String columnNameProperty = properties.getProperty(ColumnType.LIST_COLUMNS); + final String columnTypeProperty = properties.getProperty(ColumnType.LIST_COLUMN_TYPES); + final String columnCommentProperty = properties.getProperty(LIST_COLUMN_COMMENTS,""); + final String columnNameDelimiter = properties.containsKey(ColumnType.COLUMN_NAME_DELIMITER) ? properties + .getProperty(ColumnType.COLUMN_NAME_DELIMITER) : String.valueOf(COMMA); + + if (hasExternalSchema(properties) + || columnNameProperty == null || columnNameProperty.isEmpty() + || columnTypeProperty == null || columnTypeProperty.isEmpty()) { + schema = AvroSchemaUtils.determineSchemaOrThrowException(configuration, properties); + } else { + // Get column names and sort order + columnNames = StringUtils.intern( + Arrays.asList(columnNameProperty.split(columnNameDelimiter))); + columnTypes = StorageSchemaUtils.getTypeInfosFromTypeString(columnTypeProperty); + + schema = getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty); + properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString()); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("Avro schema is " + schema); + } + + if (configuration == null) { + LOG.debug("Configuration null, not inserting schema"); + } else { + configuration.set( + AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false)); + } + return new AvroFieldSchemaGenerator(schema).getFieldSchemas(); + } + + + private static boolean hasExternalSchema(Properties properties) { + return properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()) != null + || properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()) != null; + } + + public static boolean supportedCategories(MetastoreTypeInfo ti) { + final MetastoreTypeInfo.Category c = ti.getCategory(); + return c.equals(MetastoreTypeInfo.Category.PRIMITIVE) || + c.equals(MetastoreTypeInfo.Category.MAP) || + c.equals(MetastoreTypeInfo.Category.LIST) || + c.equals(MetastoreTypeInfo.Category.STRUCT) || + c.equals(MetastoreTypeInfo.Category.UNION); + } + + /** + * Attempt to determine the schema via the usual means, but do not throw + * an exception if we fail. Instead, signal failure via a special + * schema. + */ + public static Schema determineSchemaOrReturnErrorSchema(Configuration conf, Properties props) { + try { + return AvroSchemaUtils.determineSchemaOrThrowException(conf, props); + } catch(AvroSerdeException he) { + LOG.warn("Encountered AvroSerdeException determining schema. Returning " + + "signal schema to indicate problem", he); + } catch (Exception e) { + LOG.warn("Encountered exception determining schema. Returning signal " + + "schema to indicate problem", e); + } + return SchemaResolutionProblem.SIGNAL_BAD_SCHEMA; + } + + /** + * Determine the schema to that's been provided for Avro serde work. + * @param properties containing a key pointing to the schema, one way or another + * @return schema to use while serdeing the avro file + * @throws IOException if error while trying to read the schema from another location + * @throws AvroSerdeException if unable to find a schema or pointer to it in the properties + */ + public static Schema determineSchemaOrThrowException(Configuration conf, Properties properties) + throws IOException, AvroSerdeException { + String schemaString = properties.getProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName()); + if(schemaString != null && !schemaString.equals(SCHEMA_NONE)) + return AvroSchemaUtils.getSchemaFor(schemaString); + + // Try pulling directly from URL + schemaString = properties.getProperty(AvroTableProperties.SCHEMA_URL.getPropName()); + if (schemaString == null) { + final String columnNameProperty = properties.getProperty(ColumnType.LIST_COLUMNS); + final String columnTypeProperty = properties.getProperty(ColumnType.LIST_COLUMN_TYPES); + final String columnCommentProperty = properties.getProperty(LIST_COLUMN_COMMENTS); + if (columnNameProperty == null || columnNameProperty.isEmpty() + || columnTypeProperty == null || columnTypeProperty.isEmpty() ) { + throw new AvroSerdeException(EXCEPTION_MESSAGE); + } + final String columnNameDelimiter = properties.containsKey(ColumnType.COLUMN_NAME_DELIMITER) ? properties + .getProperty(ColumnType.COLUMN_NAME_DELIMITER) : String.valueOf(COMMA); + // Get column names and types + List columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); + List columnTypes = new MetastoreTypeInfoParser(columnTypeProperty).parseTypeInfos(); + + Schema schema = getSchemaFromCols(properties, columnNames, columnTypes, columnCommentProperty); + properties.setProperty(AvroTableProperties.SCHEMA_LITERAL.getPropName(), schema.toString()); + if (conf != null) + conf.set(AvroTableProperties.AVRO_SERDE_SCHEMA.getPropName(), schema.toString(false)); + return schema; + } else if(schemaString.equals(SCHEMA_NONE)) { + throw new AvroSerdeException(EXCEPTION_MESSAGE); + } + + try { + Schema s = getSchemaFromFS(schemaString, conf); + if (s == null) { + //in case schema is not a file system + return AvroSchemaUtils.getSchemaFor(new URL(schemaString)); + } + return s; + } catch (IOException ioe) { + throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, ioe); + } catch (URISyntaxException urie) { + throw new AvroSerdeException("Unable to read schema from given path: " + schemaString, urie); + } + } + + // Protected for testing and so we can pass in a conf for testing. + protected static Schema getSchemaFromFS(String schemaFSUrl, + Configuration conf) throws IOException, URISyntaxException { + FSDataInputStream in = null; + FileSystem fs = null; + try { + fs = FileSystem.get(new URI(schemaFSUrl), conf); + } catch (IOException ioe) { + //return null only if the file system in schema is not recognized + if (LOG.isDebugEnabled()) { + String msg = "Failed to open file system for uri " + schemaFSUrl + " assuming it is not a FileSystem url"; + LOG.debug(msg, ioe); + } + + return null; + } + try { + in = fs.open(new Path(schemaFSUrl)); + Schema s = AvroSchemaUtils.getSchemaFor(in); + return s; + } finally { + if(in != null) in.close(); + } + } + + public static Schema getSchemaFor(File file) { + Schema.Parser parser = new Schema.Parser(); + Schema schema; + try { + schema = parser.parse(file); + } catch (IOException e) { + throw new RuntimeException("Failed to parse Avro schema from " + file.getName(), e); + } + return schema; + } + + public static Schema getSchemaFor(InputStream stream) { + Schema.Parser parser = new Schema.Parser(); + Schema schema; + try { + schema = parser.parse(stream); + } catch (IOException e) { + throw new RuntimeException("Failed to parse Avro schema", e); + } + return schema; + } + + public static Schema getSchemaFor(String str) { + Schema.Parser parser = new Schema.Parser(); + Schema schema = parser.parse(str); + return schema; + } + + public static Schema getSchemaFor(URL url) { + InputStream in = null; + try { + in = url.openStream(); + return getSchemaFor(in); + } catch (Exception e) { + throw new RuntimeException("Failed to parse Avro schema", e); + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + // Ignore + } + } + } + } + + public static Schema getSchemaFromCols(Properties properties, + List columnNames, List columnTypes, String columnCommentProperty) { + List columnComments; + if (columnCommentProperty == null || columnCommentProperty.isEmpty()) { + columnComments = new ArrayList(); + } else { + //Comments are separated by "\0" in columnCommentProperty, see method getSchema + //in MetaStoreUtils where this string columns.comments is generated + columnComments = Arrays.asList(columnCommentProperty.split("\0")); + + if (LOG.isDebugEnabled()) { + LOG.debug("columnComments is " + columnCommentProperty); + } + } + if (columnNames.size() != columnTypes.size()) { + throw new IllegalArgumentException("getSchemaFromCols initialization failed. Number of column " + + "name and column type differs. columnNames = " + columnNames + ", columnTypes = " + + columnTypes); + } + + final String tableName = properties.getProperty(AvroSerDeConstants.TABLE_NAME); + final String tableComment = properties.getProperty(AvroSerDeConstants.TABLE_COMMENT); + MetastoreTypeInfoToSchema metastoreTypeInfoToSchema = new MetastoreTypeInfoToSchema(); + return metastoreTypeInfoToSchema.convert(columnNames, columnTypes, columnComments, + properties.getProperty(AvroTableProperties.SCHEMA_NAMESPACE.getPropName()), + properties.getProperty(AvroTableProperties.SCHEMA_NAME.getPropName(), tableName), + properties.getProperty(AvroTableProperties.SCHEMA_DOC.getPropName(), tableComment)); + + } + + /** + * Determine if an Avro schema is of type Union[T, NULL]. Avro supports nullable + * types via a union of type T and null. This is a very common use case. + * As such, we want to silently convert it to just T and allow the value to be null. + * + * When a Hive union type is used with AVRO, the schema type becomes + * Union[NULL, T1, T2, ...]. The NULL in the union should be silently removed + * + * @return true if type represents Union[T, Null], false otherwise + */ + public static boolean isNullableType(Schema schema) { + if (!schema.getType().equals(Schema.Type.UNION)) { + return false; + } + + List itemSchemas = schema.getTypes(); + if (itemSchemas.size() < 2) { + return false; + } + + for (Schema itemSchema : itemSchemas) { + if (Schema.Type.NULL.equals(itemSchema.getType())) { + return true; + } + } + + // [null, null] not allowed, so this check is ok. + return false; + } + + /** + * In a nullable type, get the schema for the non-nullable type. This method + * does no checking that the provides Schema is nullable. + */ + public static Schema getOtherTypeFromNullableType(Schema schema) { + List itemSchemas = new ArrayList<>(); + for (Schema itemSchema : schema.getTypes()) { + if (!Schema.Type.NULL.equals(itemSchema.getType())) { + itemSchemas.add(itemSchema); + } + } + + if (itemSchemas.size() > 1) { + return Schema.createUnion(itemSchemas); + } else { + return itemSchemas.get(0); + } + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/SchemaResolutionProblem.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/SchemaResolutionProblem.java new file mode 100644 index 0000000000000000000000000000000000000000..3da6e812699ecc124ff1ff5528af687aad187468 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/SchemaResolutionProblem.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.utils; + +import org.apache.avro.Schema; + +/** + * This class is copied from SchemaResolutionProblem from the Hive source code as part of metastore + * separation + */ +class SchemaResolutionProblem { + static final String sentinelString = "{\n" + + " \"namespace\": \"org.apache.hadoop.hive\",\n" + + " \"name\": \"CannotDetermineSchemaSentinel\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"ERROR_ERROR_ERROR_ERROR_ERROR_ERROR_ERROR\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"Cannot_determine_schema\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"check\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"schema\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"url\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"and\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"literal\",\n" + + " \"type\":\"string\"\n" + + " }\n" + + " ]\n" + + "}"; + public final static Schema SIGNAL_BAD_SCHEMA = AvroSchemaUtils.getSchemaFor(sentinelString); +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..16573c9f3e3077959c900b34dd6efc4eb3cc248d --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/StorageSchemaUtils.java @@ -0,0 +1,18 @@ +package org.apache.hadoop.hive.metastore.utils; + +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfoParser; + +import java.util.List; + +public class StorageSchemaUtils { + public static final char COMMA = ','; + public static List getTypeInfosFromTypeString(String columnTypeProperty) { + return new MetastoreTypeInfoParser(columnTypeProperty).parseTypeInfos(); + } + + private static final String FROM_STORAGE_SCHEMA_READER = "generated by storage schema reader"; + public static String determineFieldComment(String comment) { + return (comment == null) ? FROM_STORAGE_SCHEMA_READER : comment; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java new file mode 100644 index 0000000000000000000000000000000000000000..2bbdc00c0c2d953b42442c2b5415aaf52e8b61c6 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerDeConstants.java @@ -0,0 +1,28 @@ +package org.apache.hadoop.hive.serde2.avro; + +/** + * This class contains some of the constants which are specific to AvroSerDe + * They should always match with the constants defined in AvroSerDe.java in Hive Source code. These + * constants were copied as part of separating metastore from Hive. + */ +public class AvroSerDeConstants { + public static final String TABLE_NAME = "name"; + public static final String TABLE_COMMENT = "comment"; + public static final String LIST_COLUMN_COMMENTS = "columns.comments"; + + //it just so happens that the AVRO has these constants which are same as defined in ColumnType + //We should still keep it separate in case in future we need to separate the two + public static final String DECIMAL_TYPE_NAME = "decimal"; + public static final String CHAR_TYPE_NAME = "char"; + public static final String VARCHAR_TYPE_NAME = "varchar"; + public static final String DATE_TYPE_NAME = "date"; + + public static final String AVRO_TIMESTAMP_TYPE_NAME = "timestamp-millis"; + public static final String AVRO_PROP_LOGICAL_TYPE = "logicalType"; + public static final String AVRO_PROP_PRECISION = "precision"; + public static final String AVRO_PROP_SCALE = "scale"; + public static final String AVRO_PROP_MAX_LENGTH = "maxLength"; + public static final String AVRO_STRING_TYPE_NAME = "string"; + public static final String AVRO_INT_TYPE_NAME = "int"; + public static final String AVRO_LONG_TYPE_NAME = "long"; +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeException.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeException.java new file mode 100644 index 0000000000000000000000000000000000000000..bbf4cc1078f2a9d1395723c395228db865ca850d --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeException.java @@ -0,0 +1,11 @@ +package org.apache.hadoop.hive.serde2.avro; + +public class AvroSerdeException extends Exception { + public AvroSerdeException(String s, Exception ex) { + super(s, ex); + } + + public AvroSerdeException(String msg) { + super(msg); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java new file mode 100644 index 0000000000000000000000000000000000000000..acc693c7b7cfecff08ce70fc918ed2276c0518f0 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/InstanceCache.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.avro; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * Cache for objects whose creation only depends on some other set of objects and therefore can be + * used against other equivalent versions of those objects. Essentially memoizes instance creation. + * + * @param Object that determines the instance. The cache uses this object as a key for + * its hash which is why it is imperative to have appropriate equals and hashcode + * implementation for this object for the cache to work properly + * @param Instance that will be created from SeedObject. + */ +public abstract class InstanceCache { + private static final Logger LOG = LoggerFactory.getLogger(InstanceCache.class); + Map cache = new HashMap(); + + public InstanceCache() {} + + /** + * Retrieve (or create if it doesn't exist) the correct Instance for this + * SeedObject + */ + public Instance retrieve(SeedObject hv) throws AvroSerdeException { + return retrieve(hv, null); + } + + /** + * Retrieve (or create if it doesn't exist) the correct Instance for this + * SeedObject using 'seenSchemas' to resolve circular references + */ + public synchronized Instance retrieve(SeedObject hv, + Set seenSchemas) throws AvroSerdeException { + if(LOG.isDebugEnabled()) LOG.debug("Checking for hv: " + hv.toString()); + + if(cache.containsKey(hv)) { + if(LOG.isDebugEnabled()) LOG.debug("Returning cache result."); + return cache.get(hv); + } + + if(LOG.isDebugEnabled()) LOG.debug("Creating new instance and storing in cache"); + + Instance instance = makeInstance(hv, seenSchemas); + cache.put(hv, instance); + return instance; + } + + protected abstract Instance makeInstance(SeedObject hv, + Set seenSchemas) throws AvroSerdeException; +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/MetastoreTypeInfoToSchema.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/MetastoreTypeInfoToSchema.java new file mode 100644 index 0000000000000000000000000000000000000000..d0548cc7582417b445de10d2366c944ec17ce0b1 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/MetastoreTypeInfoToSchema.java @@ -0,0 +1,286 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.avro; + +import org.apache.avro.Schema; +import org.apache.hadoop.hive.metastore.type.CharMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.DecimalMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.ListMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.MapMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.PrimitiveMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.StructMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.UnionMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.VarcharMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils; +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfo; +import org.codehaus.jackson.JsonNode; +import org.codehaus.jackson.node.JsonNodeFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.apache.hadoop.hive.metastore.type.MetastorePrimitiveTypeCategory.STRING; + +/** + * This class contains util methods to convert TypeInfo to Avro schema and vice-versa. This class + * is copied from MetastoreTypeInfoToSchema from Hive source code. + */ +public class MetastoreTypeInfoToSchema { + + private long recordCounter = 0; + + /** + * Converts Hive schema to avro schema + * + * @param columnNames Names of the hive columns + * @param columnTypes Hive Column types + * @param namespace Namespace of Avro schema + * @param name Avro schema name + * @param doc Avro schema doc + * @return Avro Schema + */ + public Schema convert(List columnNames, List columnTypes, + List columnComments, String namespace, String name, String doc) { + + List fields = new ArrayList(); + for (int i = 0; i < columnNames.size(); ++i) { + final String comment = columnComments.size() > i ? columnComments.get(i) : null; + final Schema.Field avroField = createAvroField(columnNames.get(i), columnTypes.get(i), + comment); + fields.addAll(getFields(avroField)); + } + + if (name == null || name.isEmpty()) { + name = "baseRecord"; + } + + Schema avroSchema = Schema.createRecord(name, doc, namespace, false); + avroSchema.setFields(fields); + return avroSchema; + } + + private Schema.Field createAvroField(String name, MetastoreTypeInfo typeInfo, String comment) { + return new Schema.Field(name, createAvroSchema(typeInfo), comment, null); + } + + private Schema createAvroSchema(MetastoreTypeInfo typeInfo) { + Schema schema = null; + switch (typeInfo.getCategory()) { + case PRIMITIVE: + schema = createAvroPrimitive(typeInfo); + break; + case LIST: + schema = createAvroArray(typeInfo); + break; + case MAP: + schema = createAvroMap(typeInfo); + break; + case STRUCT: + schema = createAvroRecord(typeInfo); + break; + case UNION: + schema = createAvroUnion(typeInfo); + break; + } + + return wrapInUnionWithNull(schema); + } + + private Schema createAvroPrimitive(MetastoreTypeInfo typeInfo) { + PrimitiveMetastoreTypeInfo primitiveTypeInfo = (PrimitiveMetastoreTypeInfo) typeInfo; + Schema schema; + switch (primitiveTypeInfo.getPrimitiveCategory()) { + case STRING: + schema = Schema.create(Schema.Type.STRING); + break; + case CHAR: + schema = AvroSchemaUtils.getSchemaFor("{" + + "\"type\":\"" + AvroSerDeConstants.AVRO_STRING_TYPE_NAME + "\"," + + "\"logicalType\":\"" + AvroSerDeConstants.CHAR_TYPE_NAME + "\"," + + "\"maxLength\":" + ((CharMetastoreTypeInfo) typeInfo).getLength() + "}"); + break; + case VARCHAR: + schema = AvroSchemaUtils.getSchemaFor("{" + + "\"type\":\"" + AvroSerDeConstants.AVRO_STRING_TYPE_NAME + "\"," + + "\"logicalType\":\"" + AvroSerDeConstants.VARCHAR_TYPE_NAME + "\"," + + "\"maxLength\":" + ((VarcharMetastoreTypeInfo) typeInfo).getLength() + "}"); + break; + case BINARY: + schema = Schema.create(Schema.Type.BYTES); + break; + case BYTE: + schema = Schema.create(Schema.Type.INT); + break; + case SHORT: + schema = Schema.create(Schema.Type.INT); + break; + case INT: + schema = Schema.create(Schema.Type.INT); + break; + case LONG: + schema = Schema.create(Schema.Type.LONG); + break; + case FLOAT: + schema = Schema.create(Schema.Type.FLOAT); + break; + case DOUBLE: + schema = Schema.create(Schema.Type.DOUBLE); + break; + case BOOLEAN: + schema = Schema.create(Schema.Type.BOOLEAN); + break; + case DECIMAL: + DecimalMetastoreTypeInfo decimalTypeInfo = (DecimalMetastoreTypeInfo) typeInfo; + String precision = String.valueOf(decimalTypeInfo.precision()); + String scale = String.valueOf(decimalTypeInfo.scale()); + schema = AvroSchemaUtils.getSchemaFor("{" + + "\"type\":\"bytes\"," + + "\"logicalType\":\"decimal\"," + + "\"precision\":" + precision + "," + + "\"scale\":" + scale + "}"); + break; + case DATE: + schema = AvroSchemaUtils.getSchemaFor("{" + + "\"type\":\"" + AvroSerDeConstants.AVRO_INT_TYPE_NAME + "\"," + + "\"logicalType\":\"" + AvroSerDeConstants.DATE_TYPE_NAME + "\"}"); + break; + case TIMESTAMP: + schema = AvroSchemaUtils.getSchemaFor("{" + + "\"type\":\"" + AvroSerDeConstants.AVRO_LONG_TYPE_NAME + "\"," + + "\"logicalType\":\"" + AvroSerDeConstants.AVRO_TIMESTAMP_TYPE_NAME + "\"}"); + break; + case VOID: + schema = Schema.create(Schema.Type.NULL); + break; + default: + throw new UnsupportedOperationException(typeInfo + " is not supported."); + } + return schema; + } + + private Schema createAvroUnion(MetastoreTypeInfo typeInfo) { + List childSchemas = new ArrayList(); + for (MetastoreTypeInfo childTypeInfo : ((UnionMetastoreTypeInfo) typeInfo).getAllUnionObjectTypeInfos()) { + final Schema childSchema = createAvroSchema(childTypeInfo); + if (childSchema.getType() == Schema.Type.UNION) { + childSchemas.addAll(childSchema.getTypes()); + } else { + childSchemas.add(childSchema); + } + } + + return Schema.createUnion(removeDuplicateNullSchemas(childSchemas)); + } + + private Schema createAvroRecord(MetastoreTypeInfo typeInfo) { + List childFields = new ArrayList(); + + final List allStructFieldNames = + ((StructMetastoreTypeInfo) typeInfo).getAllStructFieldNames(); + final List allStructFieldTypeInfos = + ((StructMetastoreTypeInfo) typeInfo).getAllStructFieldTypeInfos(); + if (allStructFieldNames.size() != allStructFieldTypeInfos.size()) { + throw new IllegalArgumentException("Failed to generate avro schema from hive schema. " + + "name and column type differs. names = " + allStructFieldNames + ", types = " + + allStructFieldTypeInfos); + } + + for (int i = 0; i < allStructFieldNames.size(); ++i) { + final MetastoreTypeInfo childTypeInfo = allStructFieldTypeInfos.get(i); + final Schema.Field grandChildSchemaField = createAvroField(allStructFieldNames.get(i), + childTypeInfo, childTypeInfo.toString()); + final List grandChildFields = getFields(grandChildSchemaField); + childFields.addAll(grandChildFields); + } + + Schema recordSchema = Schema.createRecord("record_" + recordCounter, typeInfo.toString(), + null, false); + ++recordCounter; + recordSchema.setFields(childFields); + return recordSchema; + } + + private Schema createAvroMap(MetastoreTypeInfo typeInfo) { + MetastoreTypeInfo keyTypeInfo = ((MapMetastoreTypeInfo) typeInfo).getMapKeyTypeInfo(); + if (((PrimitiveMetastoreTypeInfo) keyTypeInfo).getPrimitiveCategory() + != STRING) { + throw new UnsupportedOperationException("Key of Map can only be a String"); + } + + MetastoreTypeInfo valueTypeInfo = ((MapMetastoreTypeInfo) typeInfo).getMapValueTypeInfo(); + Schema valueSchema = createAvroSchema(valueTypeInfo); + + return Schema.createMap(valueSchema); + } + + private Schema createAvroArray(MetastoreTypeInfo typeInfo) { + ListMetastoreTypeInfo listTypeInfo = (ListMetastoreTypeInfo) typeInfo; + Schema listSchema = createAvroSchema(listTypeInfo.getListElementTypeInfo()); + return Schema.createArray(listSchema); + } + + private List getFields(Schema.Field schemaField) { + List fields = new ArrayList(); + + JsonNode nullDefault = JsonNodeFactory.instance.nullNode(); + if (schemaField.schema().getType() == Schema.Type.RECORD) { + for (Schema.Field field : schemaField.schema().getFields()) { + fields.add(new Schema.Field(field.name(), field.schema(), field.doc(), nullDefault)); + } + } else { + fields.add(new Schema.Field(schemaField.name(), schemaField.schema(), schemaField.doc(), + nullDefault)); + } + + return fields; + } + + private Schema wrapInUnionWithNull(Schema schema) { + Schema wrappedSchema = schema; + switch (schema.getType()) { + case NULL: + break; + case UNION: + List existingSchemas = removeDuplicateNullSchemas(schema.getTypes()); + wrappedSchema = Schema.createUnion(existingSchemas); + break; + default: + wrappedSchema = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), schema)); + } + + return wrappedSchema; + } + + private List removeDuplicateNullSchemas(List childSchemas) { + List prunedSchemas = new ArrayList(); + boolean isNullPresent = false; + for (Schema schema : childSchemas) { + if (schema.getType() == Schema.Type.NULL) { + isNullPresent = true; + } else { + prunedSchemas.add(schema); + } + } + if (isNullPresent) { + prunedSchemas.add(0, Schema.create(Schema.Type.NULL)); + } + + return prunedSchemas; + } +} \ No newline at end of file diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java new file mode 100644 index 0000000000000000000000000000000000000000..b7ca09caf91b352dd00d8c50a3d263e28456f06d --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java @@ -0,0 +1,290 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.avro; + +import static org.apache.avro.Schema.Type.BOOLEAN; +import static org.apache.avro.Schema.Type.BYTES; +import static org.apache.avro.Schema.Type.DOUBLE; +import static org.apache.avro.Schema.Type.FIXED; +import static org.apache.avro.Schema.Type.FLOAT; +import static org.apache.avro.Schema.Type.INT; +import static org.apache.avro.Schema.Type.LONG; +import static org.apache.avro.Schema.Type.NULL; +import static org.apache.avro.Schema.Type.STRING; + +import org.apache.avro.Schema; +import org.apache.hadoop.hive.metastore.ColumnType; +import org.apache.hadoop.hive.metastore.type.DecimalMetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfo; +import org.apache.hadoop.hive.metastore.type.MetastoreTypeInfoFactory; +import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Hashtable; +import java.util.IdentityHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Convert an Avro Schema to a Hive TypeInfo. This class is copied from Hive source code for + * standalone metastore + */ +public class SchemaToTypeInfo { + // Conversion of Avro primitive types to Hive primitive types + // Avro Hive + // Null + // boolean boolean check + // int int check + // long bigint check + // float double check + // double double check + // bytes binary check + // fixed binary check + // string string check + // tinyint + // smallint + + // Map of Avro's primitive types to Hives (for those that are supported by both) + private static final Map primitiveTypeToTypeInfo = initTypeMap(); + private static Map initTypeMap() { + Map theMap = new Hashtable(); + theMap.put(NULL, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("void")); + theMap.put(BOOLEAN, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("boolean")); + theMap.put(INT, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("int")); + theMap.put(LONG, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("bigint")); + theMap.put(FLOAT, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("float")); + theMap.put(DOUBLE, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("double")); + theMap.put(BYTES, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("binary")); + theMap.put(FIXED, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("binary")); + theMap.put(STRING, MetastoreTypeInfoFactory.getPrimitiveTypeInfo("string")); + return Collections.unmodifiableMap(theMap); + } + + /** + * Generate a list of of TypeInfos from an Avro schema. This method is + * currently public due to some weirdness in deserializing unions, but + * will be made private once that is resolved. + * @param schema Schema to generate field types for + * @return List of TypeInfos, each element of which is a TypeInfo derived + * from the schema. + * @throws AvroSerdeException for problems during conversion. + */ + public static List< MetastoreTypeInfo> generateColumnTypes(Schema schema) throws AvroSerdeException { + return generateColumnTypes (schema, null); + } + + /** + * Generate a list of of TypeInfos from an Avro schema. This method is + * currently public due to some weirdness in deserializing unions, but + * will be made private once that is resolved. + * @param schema Schema to generate field types for + * @param seenSchemas stores schemas processed in the parsing done so far, + * helping to resolve circular references in the schema + * @return List of TypeInfos, each element of which is a MetastoreTypeInfo derived + * from the schema. + * @throws AvroSerdeException for problems during conversion. + */ + public static List< MetastoreTypeInfo> generateColumnTypes(Schema schema, + Set seenSchemas) throws AvroSerdeException { + List fields = schema.getFields(); + + List< MetastoreTypeInfo> types = new ArrayList< MetastoreTypeInfo>(fields.size()); + + for (Schema.Field field : fields) { + types.add(generateTypeInfo(field.schema(), seenSchemas)); + } + + return types; + } + + static InstanceCache typeInfoCache = new InstanceCache() { + @Override + protected MetastoreTypeInfo makeInstance(Schema s, + Set seenSchemas) + throws AvroSerdeException { + return generateTypeInfoWorker(s, seenSchemas); + } + }; + /** + * Convert an Avro Schema into an equivalent Hive MetastoreTypeInfo. + * @param schema to record. Must be of record type. + * @param seenSchemas stores schemas processed in the parsing done so far, + * helping to resolve circular references in the schema + * @return MetastoreTypeInfo matching the Avro schema + * @throws AvroSerdeException for any problems during conversion. + */ + public static MetastoreTypeInfo generateTypeInfo(Schema schema, + Set seenSchemas) throws AvroSerdeException { + // For bytes type, it can be mapped to decimal. + Schema.Type type = schema.getType(); + if (type == BYTES && AvroSerDeConstants.DECIMAL_TYPE_NAME + .equalsIgnoreCase(schema.getProp(AvroSerDeConstants.AVRO_PROP_LOGICAL_TYPE))) { + int precision = 0; + int scale = 0; + try { + precision = schema.getJsonProp(AvroSerDeConstants.AVRO_PROP_PRECISION).getIntValue(); + scale = schema.getJsonProp(AvroSerDeConstants.AVRO_PROP_SCALE).getIntValue(); + } catch (Exception ex) { + throw new AvroSerdeException("Failed to obtain scale value from file schema: " + schema, ex); + } + + try { + DecimalMetastoreTypeInfo.validateParameter(precision, scale); + } catch (Exception ex) { + throw new AvroSerdeException("Invalid precision or scale for decimal type", ex); + } + + return MetastoreTypeInfoFactory.getDecimalTypeInfo(precision, scale); + } + + if (type == STRING && + AvroSerDeConstants.CHAR_TYPE_NAME.equalsIgnoreCase(schema.getProp(AvroSerDeConstants.AVRO_PROP_LOGICAL_TYPE))) { + int maxLength = 0; + try { + maxLength = schema.getJsonProp(AvroSerDeConstants.AVRO_PROP_MAX_LENGTH).getValueAsInt(); + } catch (Exception ex) { + throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); + } + return MetastoreTypeInfoFactory.getCharTypeInfo(maxLength); + } + + if (type == STRING && AvroSerDeConstants.VARCHAR_TYPE_NAME + .equalsIgnoreCase(schema.getProp(AvroSerDeConstants.AVRO_PROP_LOGICAL_TYPE))) { + int maxLength = 0; + try { + maxLength = schema.getJsonProp(AvroSerDeConstants.AVRO_PROP_MAX_LENGTH).getValueAsInt(); + } catch (Exception ex) { + throw new AvroSerdeException("Failed to obtain maxLength value from file schema: " + schema, ex); + } + return MetastoreTypeInfoFactory.getVarcharTypeInfo(maxLength); + } + + if (type == INT && + AvroSerDeConstants.DATE_TYPE_NAME.equals(schema.getProp(AvroSerDeConstants.AVRO_PROP_LOGICAL_TYPE))) { + //in case AvroSerDeConstants.DATE_TYPE_NAME matches with ColumnType.DATE_TYPE_NAME this will + //error out. This code works since they both are expected to be same. If this assumption is broken + //we need to map avro's date type to DateMetastoreTypeInfo + return MetastoreTypeInfoFactory.getPrimitiveTypeInfo(AvroSerDeConstants.DATE_TYPE_NAME); + } + + if (type == LONG && + AvroSerDeConstants.AVRO_TIMESTAMP_TYPE_NAME.equals(schema.getProp(AvroSerDeConstants.AVRO_PROP_LOGICAL_TYPE))) { + //The AVRO's timestamp type is different than the metastore's timestamp type. Use ColumnType.TIMESTAMP_TYPE_NAME + return MetastoreTypeInfoFactory.getPrimitiveTypeInfo(ColumnType.TIMESTAMP_TYPE_NAME); + } + + return typeInfoCache.retrieve(schema, seenSchemas); + } + + private static MetastoreTypeInfo generateTypeInfoWorker(Schema schema, + Set seenSchemas) throws AvroSerdeException { + // Avro requires NULLable types to be defined as unions of some type T + // and NULL. This is annoying and we're going to hide it from the user. + if(AvroSchemaUtils.isNullableType(schema)) { + return generateTypeInfo( + AvroSchemaUtils.getOtherTypeFromNullableType(schema), seenSchemas); + } + + Schema.Type type = schema.getType(); + if(primitiveTypeToTypeInfo.containsKey(type)) { + return primitiveTypeToTypeInfo.get(type); + } + + switch(type) { + case RECORD: return generateRecordTypeInfo(schema, seenSchemas); + case MAP: return generateMapTypeInfo(schema, seenSchemas); + case ARRAY: return generateArrayTypeInfo(schema, seenSchemas); + case UNION: return generateUnionTypeInfo(schema, seenSchemas); + case ENUM: return generateEnumTypeInfo(schema); + default: throw new AvroSerdeException("Do not yet support: " + schema); + } + } + + private static MetastoreTypeInfo generateRecordTypeInfo(Schema schema, + Set seenSchemas) throws AvroSerdeException { + assert schema.getType().equals(Schema.Type.RECORD); + + if (seenSchemas == null) { + seenSchemas = Collections.newSetFromMap(new IdentityHashMap()); + } else if (seenSchemas.contains(schema)) { + throw new AvroSerdeException( + "Recursive schemas are not supported. Recursive schema was " + schema + .getFullName()); + } + seenSchemas.add(schema); + + List fields = schema.getFields(); + List fieldNames = new ArrayList(fields.size()); + List< MetastoreTypeInfo> typeInfos = new ArrayList< MetastoreTypeInfo>(fields.size()); + + for(int i = 0; i < fields.size(); i++) { + fieldNames.add(i, fields.get(i).name()); + typeInfos.add(i, generateTypeInfo(fields.get(i).schema(), seenSchemas)); + } + + return MetastoreTypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos); + } + + /** + * Generate a MetastoreTypeInfo for an Avro Map. This is made slightly simpler in that + * Avro only allows maps with strings for keys. + */ + private static MetastoreTypeInfo generateMapTypeInfo(Schema schema, + Set seenSchemas) throws AvroSerdeException { + assert schema.getType().equals(Schema.Type.MAP); + Schema valueType = schema.getValueType(); + MetastoreTypeInfo ti = generateTypeInfo(valueType, seenSchemas); + + return MetastoreTypeInfoFactory.getMapTypeInfo( MetastoreTypeInfoFactory.getPrimitiveTypeInfo("string"), ti); + } + + private static MetastoreTypeInfo generateArrayTypeInfo(Schema schema, + Set seenSchemas) throws AvroSerdeException { + assert schema.getType().equals(Schema.Type.ARRAY); + Schema itemsType = schema.getElementType(); + MetastoreTypeInfo itemsTypeInfo = generateTypeInfo(itemsType, seenSchemas); + + return MetastoreTypeInfoFactory.getListTypeInfo(itemsTypeInfo); + } + + private static MetastoreTypeInfo generateUnionTypeInfo(Schema schema, + Set seenSchemas) throws AvroSerdeException { + assert schema.getType().equals(Schema.Type.UNION); + List types = schema.getTypes(); + + + List< MetastoreTypeInfo> typeInfos = new ArrayList< MetastoreTypeInfo>(types.size()); + + for(Schema type : types) { + typeInfos.add(generateTypeInfo(type, seenSchemas)); + } + + return MetastoreTypeInfoFactory.getUnionTypeInfo(typeInfos); + } + + // Hive doesn't have an Enum type, so we're going to treat them as Strings. + // During the deserialize/serialize stage we'll check for enumness and + // convert as such. + private static MetastoreTypeInfo generateEnumTypeInfo(Schema schema) { + assert schema.getType().equals(Schema.Type.ENUM); + + return MetastoreTypeInfoFactory.getPrimitiveTypeInfo("string"); + } +} diff --git a/standalone-metastore/src/main/sql/mysql/hive-schema-3.0.0.mysql.sql b/standalone-metastore/src/main/sql/mysql/hive-schema-3.0.0.mysql.sql index bbd56f314e716c4a186c0a7bd1582fe8abc88d99..7d5b646e3706f6765f8e495c0ce04c0f315c11d4 100644 --- a/standalone-metastore/src/main/sql/mysql/hive-schema-3.0.0.mysql.sql +++ b/standalone-metastore/src/main/sql/mysql/hive-schema-3.0.0.mysql.sql @@ -911,6 +911,13 @@ CREATE TABLE IF NOT EXISTS WM_MAPPING CONSTRAINT `WM_MAPPING_FK2` FOREIGN KEY (`POOL_ID`) REFERENCES `WM_POOL` (`POOL_ID`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; +CREATE TABLE IF NOT EXISTS `MV_CREATION_METADATA` ( + `TBL_ID` bigint(20) NOT NULL, + `TBL_NAME` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL, + `LAST_TRANSACTION_INFO` mediumtext NOT NULL, + CONSTRAINT `MV_CREATION_METADATA_FK` FOREIGN KEY (`TBL_ID`) REFERENCES `TBLS` (`TBL_ID`) +) ENGINE=InnoDB DEFAULT CHARSET=latin1; + -- ---------------------------- -- Transaction and Lock Tables -- ---------------------------- diff --git a/standalone-metastore/src/main/sql/oracle/hive-schema-3.0.0.oracle.sql b/standalone-metastore/src/main/sql/oracle/hive-schema-3.0.0.oracle.sql index 62614e401b23d60ab7d9c8b023e5f5bc94fea41d..3040211705d3e8a9db6d3195ab73e266403591b8 100644 --- a/standalone-metastore/src/main/sql/oracle/hive-schema-3.0.0.oracle.sql +++ b/standalone-metastore/src/main/sql/oracle/hive-schema-3.0.0.oracle.sql @@ -635,6 +635,15 @@ CREATE TABLE WM_MAPPING ALTER TABLE WM_MAPPING ADD CONSTRAINT WM_MAPPING_PK PRIMARY KEY (MAPPING_ID); +CREATE TABLE MV_CREATION_METADATA +( + TBL_ID BIGINT NOT NULL, + TBL_NAME nvarchar(256) NOT NULL, + LAST_TRANSACTION_INFO CLOB NOT NULL +); + +ALTER TABLE MV_CREATION_METADATA ADD CONSTRAINT MV_CREATION_METADATA_FK FOREIGN KEY (TBL_ID) REFERENCES TBLS (TBL_ID); + -- Constraints for table PART_COL_PRIVS for class(es) [org.apache.hadoop.hive.metastore.model.MPartitionColumnPrivilege] ALTER TABLE PART_COL_PRIVS ADD CONSTRAINT PART_COL_PRIVS_FK1 FOREIGN KEY (PART_ID) REFERENCES PARTITIONS (PART_ID) INITIALLY DEFERRED ; diff --git a/standalone-metastore/src/main/sql/postgres/hive-schema-3.0.0.postgres.sql b/standalone-metastore/src/main/sql/postgres/hive-schema-3.0.0.postgres.sql index 91fa9a933141be5f0e32cb1c163dc2145a3531df..b6e8bed14e6e19a0eee556f307434d963b214f77 100644 --- a/standalone-metastore/src/main/sql/postgres/hive-schema-3.0.0.postgres.sql +++ b/standalone-metastore/src/main/sql/postgres/hive-schema-3.0.0.postgres.sql @@ -661,6 +661,16 @@ CREATE TABLE "WM_MAPPING" ( "ORDERING" integer ); +CREATE TABLE "MV_CREATION_METADATA" ( + "TBL_ID" BIGINT NOT NULL, + "TBL_NAME" character varying(256) NOT NULL, + "LAST_TRANSACTION_INFO" TEXT NOT NULL +); + +ALTER TABLE ONLY "MV_CREATION_METADATA" + ADD CONSTRAINT "MV_CREATION_METADATA_FK" FOREIGN KEY ("TBL_ID") REFERENCES "TBLS"("TBL_ID") DEFERRABLE; + + -- -- Name: BUCKETING_COLS_pkey; Type: CONSTRAINT; Schema: public; Owner: hiveuser; Tablespace: -- diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/schema/reader/TestDefaultStorageSchemaReader.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/schema/reader/TestDefaultStorageSchemaReader.java new file mode 100644 index 0000000000000000000000000000000000000000..24c5447d350afb719a43c0c2ccc8281ec458c06b --- /dev/null +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/schema/reader/TestDefaultStorageSchemaReader.java @@ -0,0 +1,578 @@ +package org.apache.hadoop.hive.metastore.schema.reader; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.DefaultStorageSchemaReader; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.MetaStoreTestUtils; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.InvalidOperationException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.Type; +import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.utils.AvroSchemaUtils; +import org.apache.hadoop.util.StringUtils; +import org.apache.thrift.TException; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +public class TestDefaultStorageSchemaReader { + private static final Logger LOG = LoggerFactory.getLogger(TestDefaultStorageSchemaReader.class); + private static final String TEST_DB_NAME = "TEST_DB"; + private static final String TEST_TABLE_NAME = "TEST_TABLE"; + private HiveMetaStoreClient client; + private Configuration conf; + private Warehouse warehouse; + private static final int DEFAULT_LIMIT_PARTITION_REQUEST = 100; + private static final String AVRO_SERIALIZATION_LIB = + "org.apache.hadoop.hive.serde2.avro.AvroSerDe"; + + // These schemata are used in other tests + static public final String MAP_WITH_PRIMITIVE_VALUE_TYPE = "{\n" + + " \"namespace\": \"testing\",\n" + + " \"name\": \"oneMap\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aMap\",\n" + + " \"type\":{\"type\":\"map\",\n" + + " \"values\":\"long\"}\n" + + "\t}\n" + + " ]\n" + + "}"; + static public final String ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE = "{\n" + + " \"namespace\": \"testing\",\n" + + " \"name\": \"oneArray\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"anArray\",\n" + + " \"type\":{\"type\":\"array\",\n" + + " \"items\":\"string\"}\n" + + "\t}\n" + + " ]\n" + + "}"; + public static final String RECORD_SCHEMA = "{\n" + + " \"namespace\": \"testing.test.mctesty\",\n" + + " \"name\": \"oneRecord\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aRecord\",\n" + + " \"type\":{\"type\":\"record\",\n" + + " \"name\":\"recordWithinARecord\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"int1\",\n" + + " \"type\":\"int\"\n" + + " },\n" + + " {\n" + + " \"name\":\"boolean1\",\n" + + " \"type\":\"boolean\"\n" + + " },\n" + + " {\n" + + " \"name\":\"long1\",\n" + + " \"type\":\"long\"\n" + + " }\n" + + " ]}\n" + + " }\n" + + " ]\n" + + "}"; + public static final String NULLABLE_RECORD_SCHEMA = "[\"null\", " + RECORD_SCHEMA + "]"; + public static final String UNION_SCHEMA = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"int\", \"string\"]\n" + + " }\n" + + " ]\n" + + "}"; + public static final String UNION_SCHEMA_2 = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"null\", \"int\", \"string\"]\n" + + " }\n" + + " ]\n" + + "}"; + public static final String UNION_SCHEMA_3 = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"float\",\"int\"]\n" + + " }\n" + + " ]\n" + + "}"; + public static final String UNION_SCHEMA_4 = "{\n" + + " \"namespace\": \"test.a.rossa\",\n" + + " \"name\": \"oneUnion\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aUnion\",\n" + + " \"type\":[\"int\",\"float\",\"long\"]\n" + + " }\n" + + " ]\n" + + "}"; + public static final String ENUM_SCHEMA = "{\n" + + " \"namespace\": \"clever.namespace.name.in.space\",\n" + + " \"name\": \"oneEnum\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"baddies\",\n" + + " \"type\":{\"type\":\"enum\",\"name\":\"villians\", \"symbols\": " + + "[\"DALEKS\", \"CYBERMEN\", \"SLITHEEN\", \"JAGRAFESS\"]}\n" + + " \n" + + " \n" + + " }\n" + + " ]\n" + + "}"; + public static final String FIXED_SCHEMA = "{\n" + + " \"namespace\": \"ecapseman\",\n" + + " \"name\": \"oneFixed\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"hash\",\n" + + " \"type\":{\"type\": \"fixed\", \"name\": \"MD5\", \"size\": 16}\n" + + " }\n" + + " ]\n" + + "}"; + public static final String NULLABLE_STRING_SCHEMA = "{\n" + + " \"type\": \"record\", \n" + + " \"name\": \"nullableUnionTest\",\n" + + " \"fields\" : [\n" + + " {\"name\":\"nullableString\", \"type\":[\"null\", \"string\"]}\n" + + " ]\n" + + "}"; + public static final String MAP_WITH_NULLABLE_PRIMITIVE_VALUE_TYPE_SCHEMA = "{\n" + + " \"namespace\": \"testing\",\n" + + " \"name\": \"mapWithNullableUnionTest\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"aMap\",\n" + + " \"type\":{\"type\":\"map\",\n" + + " \"values\":[\"null\",\"long\"]}\n" + + "\t}\n" + + " ]\n" + + "}"; + public static final String NULLABLE_ENUM_SCHEMA = "{\n" + + " \"namespace\": \"clever.namespace.name.in.space\",\n" + + " \"name\": \"nullableUnionTest\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"nullableEnum\",\n" + + " \"type\": [\"null\", {\"type\":\"enum\",\"name\":\"villians\", \"symbols\": " + + "[\"DALEKS\", \"CYBERMEN\", \"SLITHEEN\", \"JAGRAFESS\"]}]\n" + + " \n" + + " \n" + + " }\n" + + " ]\n" + + "}"; + public static final String BYTES_SCHEMA = "{\n" + + " \"type\": \"record\", \n" + + " \"name\": \"bytesTest\",\n" + + " \"fields\" : [\n" + + " {\"name\":\"bytesField\", \"type\":\"bytes\"}\n" + + " ]\n" + + "}"; + + public static final String KITCHEN_SINK_SCHEMA = "{\n" + + " \"namespace\": \"org.apache.hadoop.hive\",\n" + + " \"name\": \"kitchsink\",\n" + + " \"type\": \"record\",\n" + + " \"fields\": [\n" + + " {\n" + + " \"name\":\"string1\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"string2\",\n" + + " \"type\":\"string\"\n" + + " },\n" + + " {\n" + + " \"name\":\"int1\",\n" + + " \"type\":\"int\"\n" + + " },\n" + + " {\n" + + " \"name\":\"boolean1\",\n" + + " \"type\":\"boolean\"\n" + + " },\n" + + " {\n" + + " \"name\":\"long1\",\n" + + " \"type\":\"long\"\n" + + " },\n" + + " {\n" + + " \"name\":\"float1\",\n" + + " \"type\":\"float\"\n" + + " },\n" + + " {\n" + + " \"name\":\"double1\",\n" + + " \"type\":\"double\"\n" + + " },\n" + + " {\n" + + " \"name\":\"inner_record1\",\n" + + " \"type\":{ \"type\":\"record\",\n" + + " \"name\":\"inner_record1_impl\",\n" + + " \"fields\": [\n" + + " {\"name\":\"int_in_inner_record1\",\n" + + " \"type\":\"int\"},\n" + + " {\"name\":\"string_in_inner_record1\",\n" + + " \"type\":\"string\"}\n" + + " ]\n" + + " }\n" + + " },\n" + + " {\n" + + " \"name\":\"enum1\",\n" + + " \"type\":{\"type\":\"enum\", \"name\":\"enum1_values\", " + + "\"symbols\":[\"ENUM1_VALUES_VALUE1\",\"ENUM1_VALUES_VALUE2\", \"ENUM1_VALUES_VALUE3\"]}\n" + + " },\n" + + " {\n" + + " \"name\":\"array1\",\n" + + " \"type\":{\"type\":\"array\", \"items\":\"string\"}\n" + + " },\n" + + " {\n" + + " \"name\":\"map1\",\n" + + " \"type\":{\"type\":\"map\", \"values\":\"string\"}\n" + + " },\n" + + " {\n" + + " \"name\":\"union1\",\n" + + " \"type\":[\"float\", \"boolean\", \"string\"]\n" + + " },\n" + + " {\n" + + " \"name\":\"fixed1\",\n" + + " \"type\":{\"type\":\"fixed\", \"name\":\"fourbytes\", \"size\":4}\n" + + " },\n" + + " {\n" + + " \"name\":\"null1\",\n" + + " \"type\":\"null\"\n" + + " },\n" + + " {\n" + + " \"name\":\"UnionNullInt\",\n" + + " \"type\":[\"int\", \"null\"]\n" + + " },\n" + + " {\n" + + " \"name\":\"bytes1\",\n" + + " \"type\":\"bytes\"\n" + + " }\n" + + " ]\n" + + "}"; + + @Before + public void setUp() throws Exception { + conf = MetastoreConf.newMetastoreConf(); + warehouse = new Warehouse(conf); + + // set some values to use for getting conf. vars + MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.METRICS_ENABLED, true); + conf.set("hive.key1", "value1"); + conf.set("hive.key2", "http://www.example.com"); + conf.set("hive.key3", ""); + conf.set("hive.key4", "0"); + conf.set("datanucleus.autoCreateTables", "false"); + + MetaStoreTestUtils.setConfForStandloneMode(conf); + MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX, 2); + MetastoreConf.setLongVar(conf, MetastoreConf.ConfVars.LIMIT_PARTITION_REQUEST, + DEFAULT_LIMIT_PARTITION_REQUEST); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.STORAGE_SCHEMA_READER_IMPL, + DefaultStorageSchemaReader.class.getName()); + client = createClient(); + } + + @After + public void closeClient() { + client.close(); + } + + private void silentDropDatabase(String dbName) throws TException { + try { + for (String tableName : client.getTables(dbName, "*")) { + client.dropTable(dbName, tableName); + } + client.dropDatabase(dbName); + } catch (NoSuchObjectException | InvalidOperationException e) { + // NOP + } + } + + private HiveMetaStoreClient createClient() throws Exception { + try { + return new HiveMetaStoreClient(conf); + } catch (Throwable e) { + System.err.println("Unable to open the metastore"); + System.err.println(StringUtils.stringifyException(e)); + throw new Exception(e); + } + } + + @Test + public void testSimpleAvroTable() throws TException, IOException { + List fields = new ArrayList<>(2); + FieldSchema field = new FieldSchema(); + field.setName("name"); + field.setType("string"); + field.setComment("Test name comment"); + fields.add(field); + + field = new FieldSchema(); + field.setName("age"); + field.setType("int"); + field.setComment("Test age comment"); + fields.add(field); + + createTable(TEST_DB_NAME, TEST_TABLE_NAME, AVRO_SERIALIZATION_LIB, fields, null); + List retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + verifyTableFields(fields, retFields, null); + } + + private Table createTable(String dbName, String tblName, String serializationLib, + List fields, Map tblProperties) throws TException, IOException { + client.dropTable(dbName, tblName); + silentDropDatabase(dbName); + Database db = new Database(); + db.setName(dbName); + client.createDatabase(db); + db = client.getDatabase(dbName); + Path dbPath = new Path(db.getLocationUri()); + FileSystem fs = FileSystem.get(dbPath.toUri(), conf); + String typeName = "dummy"; + client.dropType(typeName); + Type typ1 = new Type(); + typ1.setName(typeName); + typ1.setFields(fields); + client.createType(typ1); + + Table tbl = new TableBuilder().setDbName(dbName).setTableName(tblName).setCols(typ1.getFields()) + .setSerdeLib(serializationLib).setTableParams(tblProperties).build(); + client.createTable(tbl); + return client.getTable(dbName, tblName); + } + + @Test + public void testExternalSchemaAvroTable() throws TException, IOException { + //map + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, MAP_WITH_PRIMITIVE_VALUE_TYPE); + List retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aMap", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "map", + retFields.get(0).getType()); + Assert.assertEquals("Unexpected comment of the field", "", retFields.get(0).getComment()); + + //list + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, + ARRAY_WITH_PRIMITIVE_ELEMENT_TYPE); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "anArray", retFields.get(0).getName()); + Assert + .assertEquals("Unexpected type of the field", "array", retFields.get(0).getType()); + + //struct + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, RECORD_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aRecord", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", + "struct", retFields.get(0).getType()); + + //union + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, UNION_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aUnion", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "uniontype", + retFields.get(0).getType()); + + //union-2 + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, UNION_SCHEMA_2); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aUnion", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "uniontype", + retFields.get(0).getType()); + + //union_3 + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, UNION_SCHEMA_3); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aUnion", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "uniontype", + retFields.get(0).getType()); + + //union_4 + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, UNION_SCHEMA_4); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aUnion", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "uniontype", + retFields.get(0).getType()); + + //enum + // Enums are one of two Avro types that Hive doesn't have any native support for. + // Column names - we lose the enumness of this schema + // Column types become string + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, ENUM_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "baddies", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "string", + retFields.get(0).getType()); + + // Hive has no concept of Avro's fixed type. Fixed -> arrays of bytes + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, FIXED_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "hash", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "binary", + retFields.get(0).getType()); + + //nullable string + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, NULLABLE_STRING_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "nullableString", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "string", + retFields.get(0).getType()); + + //map with nullable value - That Union[T, NULL] is converted to just T, within a Map + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, MAP_WITH_NULLABLE_PRIMITIVE_VALUE_TYPE_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "aMap", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "map", + retFields.get(0).getType()); + + // That Union[T, NULL] is converted to just T. + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, NULLABLE_ENUM_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "nullableEnum", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "string", + retFields.get(0).getType()); + + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, BYTES_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 1, retFields.size()); + Assert.assertEquals("Unexpected name of the field", "bytesField", retFields.get(0).getName()); + Assert.assertEquals("Unexpected type of the field", "binary", + retFields.get(0).getType()); + + createAvroTableWithExternalSchema(TEST_DB_NAME, TEST_TABLE_NAME, KITCHEN_SINK_SCHEMA); + retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + Assert.assertEquals("Unexpected number of fields", 16, retFields.size()); + //There are 16 fields in this schema. Instead of verifying all we verify the interesting ones + //(ones which have not been tested above) + Assert + .assertEquals("Unexpected name of 8th field", "inner_record1", retFields.get(7).getName()); + Assert.assertEquals("Unexpected type of the field", + "struct", + retFields.get(7).getType()); + + Assert.assertEquals("Unexpected field name of the 10th field", "array1", + retFields.get(9).getName()); + Assert.assertEquals("Unexpected field type of the 10th field", "array", + retFields.get(9).getType()); + + Assert.assertEquals("Unexpected field name of the 11th field", "map1", + retFields.get(10).getName()); + Assert.assertEquals("Unexpected field type of the 11th field", "map", + retFields.get(10).getType()); + + Assert.assertEquals("Unexpected field name of the 12th field", "union1", + retFields.get(11).getName()); + Assert + .assertEquals("Unexpected field type of the 12th field", "uniontype", + retFields.get(11).getType()); + + Assert.assertEquals("Unexpected field name of the 14th field", "null1", + retFields.get(13).getName()); + Assert.assertEquals("Unexpected field type of the 14th field", "void", + retFields.get(13).getType()); + + Assert.assertEquals("Unexpected field name of the 15th field", "UnionNullInt", + retFields.get(14).getName()); + Assert.assertEquals("Unexpected field type of the 15th field", "int", + retFields.get(14).getType()); + } + + private void createAvroTableWithExternalSchema(String dbName, String tblName, String schemaStr) + throws TException, IOException { + List fields = new ArrayList<>(0); + Map tblParams = new HashMap<>(); + tblParams.put(AvroSchemaUtils.AvroTableProperties.SCHEMA_LITERAL.getPropName(), schemaStr); + createTable(dbName, tblName, AVRO_SERIALIZATION_LIB, fields, tblParams); + } + + @Test + public void testSimpleTable() throws TException, IOException { + List fields = new ArrayList<>(2); + FieldSchema field = new FieldSchema(); + field.setName("name"); + field.setType("string"); + field.setComment("Test name comment"); + fields.add(field); + + field = new FieldSchema(); + field.setName("age"); + field.setType("int"); + field.setComment("Test age comment"); + fields.add(field); + + createTable(TEST_DB_NAME, TEST_TABLE_NAME, null, fields, null); + List retFields = client.getFields(TEST_DB_NAME, TEST_TABLE_NAME); + verifyTableFields(fields, retFields, null); + } + + private void verifyTableFields(List expected, List actual, + String nullCommentText) { + Assert.assertEquals(expected.size(), actual.size()); + int size = expected.size(); + for (int i = 0; i < size; i++) { + FieldSchema expectedField = expected.get(i); + FieldSchema actualField = actual.get(i); + Assert.assertEquals("Name does not match for field " + (i + 1), expectedField.getName(), + actualField.getName()); + Assert.assertEquals("Type does not match for field " + (i + 1), expectedField.getType(), + actualField.getType()); + String expectedComment = null; + if (expectedField.getComment() == null && nullCommentText != null) { + expectedComment = nullCommentText; + } else { + expectedComment = expectedField.getComment(); + } + Assert.assertEquals("Comment does not match for field " + (i + 1), expectedComment, + actualField.getComment()); + } + } +} diff --git a/storage-api/serde-api/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfoValidationUtils.java b/storage-api/serde-api/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfoValidationUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..7290192d04ff9ba547d2b1b02bc41c02f2ba4dfb --- /dev/null +++ b/storage-api/serde-api/src/java/org/apache/hadoop/hive/serde2/typeinfo/PrimitiveTypeInfoValidationUtils.java @@ -0,0 +1,35 @@ +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.hive.serde.serdeConstants; + +public class PrimitiveTypeInfoValidationUtils { + public static void validateVarcharParameter(int length) { + if (length > serdeConstants.MAX_VARCHAR_LENGTH || length < 1) { + throw new RuntimeException("Varchar length " + length + " out of allowed range [1, " + + serdeConstants.MAX_VARCHAR_LENGTH + "]"); + } + } + + public static void validateCharParameter(int length) { + if (length > serdeConstants.MAX_CHAR_LENGTH || length < 1) { + throw new RuntimeException("Char length " + length + " out of allowed range [1, " + + serdeConstants.MAX_CHAR_LENGTH + "]"); + } + } + + public static void validateParameter(int precision, int scale) { + if (precision < 1 || precision > serdeConstants.MAX_PRECISION) { + throw new IllegalArgumentException("Decimal precision out of allowed range [1," + + serdeConstants.MAX_PRECISION + "]"); + } + + if (scale < 0 || scale > serdeConstants.MAX_SCALE) { + throw new IllegalArgumentException("Decimal scale out of allowed range [0," + + serdeConstants.MAX_SCALE + "]"); + } + + if (precision < scale) { + throw new IllegalArgumentException("Decimal scale must be less than or equal to precision"); + } + } +} \ No newline at end of file diff --git a/storage-api/serde-api/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoParser.java b/storage-api/serde-api/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoParser.java new file mode 100644 index 0000000000000000000000000000000000000000..cd0a4230ee3db0ad9f32e2ac17296480b9f3e2b9 --- /dev/null +++ b/storage-api/serde-api/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoParser.java @@ -0,0 +1,338 @@ +package org.apache.hadoop.hive.serde2.typeinfo; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveTypeEntry; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +/** + * Parse a recursive TypeInfo list String. For example, the following inputs + * are valid inputs: + * "int,string,map,list>>,list>" + * The separators between TypeInfos can be ",", ":", or ";". + * + * In order to use this class: TypeInfoParser parser = new + * TypeInfoParser("int,string"); ArrayList typeInfos = + * parser.parseTypeInfos(); + */ +@InterfaceStability.Evolving +@InterfaceAudience.LimitedPrivate({"Standalone Metastore", "Hive"}) +public class TypeInfoParser { + + /** + * Make some of the TypeInfo parsing available as a utility. + */ + public static PrimitiveParts parsePrimitiveParts(String typeInfoString) { + TypeInfoParser parser = new TypeInfoParser(typeInfoString); + return parser.parsePrimitiveParts(); + } + + private static class Token { + public int position; + public String text; + public boolean isType; + + @Override + public String toString() { + return "" + position + ":" + text; + } + }; + + private static boolean isTypeChar(char c) { + return Character.isLetterOrDigit(c) || c == '_' || c == '.' || c == ' ' || c == '$'; + } + + /** + * Tokenize the typeInfoString. The rule is simple: all consecutive + * alphadigits and '_', '.' are in one token, and all other characters are + * one character per token. + * + * tokenize("map") should return + * ["map","<","int",",","string",">"] + * + * Note that we add '$' in new Calcite return path. As '$' will not appear + * in any type in Hive, it is safe to do so. + */ + private static ArrayList tokenize(String typeInfoString) { + ArrayList tokens = new ArrayList(0); + int begin = 0; + int end = 1; + while (end <= typeInfoString.length()) { + // last character ends a token? + // if there are quotes, all the text between the quotes + // is considered a single token (this can happen for + // timestamp with local time-zone) + if (begin > 0 && + typeInfoString.charAt(begin - 1) == '(' && + typeInfoString.charAt(begin) == '\'') { + // Ignore starting quote + begin++; + do { + end++; + } while (typeInfoString.charAt(end) != '\''); + } else if (typeInfoString.charAt(begin) == '\'' && + typeInfoString.charAt(begin + 1) == ')') { + // Ignore closing quote + begin++; + end++; + } + if (end == typeInfoString.length() + || !isTypeChar(typeInfoString.charAt(end - 1)) + || !isTypeChar(typeInfoString.charAt(end))) { + Token t = new Token(); + t.position = begin; + t.text = typeInfoString.substring(begin, end); + t.isType = isTypeChar(typeInfoString.charAt(begin)); + tokens.add(t); + begin = end; + } + end++; + } + return tokens; + } + + public TypeInfoParser(String typeInfoString) { + this.typeInfoString = typeInfoString; + typeInfoTokens = tokenize(typeInfoString); + } + + private final String typeInfoString; + private final ArrayList typeInfoTokens; + private ArrayList typeInfos; + private int iToken; + + public ArrayList parseTypeInfos() { + typeInfos = new ArrayList(); + iToken = 0; + while (iToken < typeInfoTokens.size()) { + typeInfos.add(parseType()); + if (iToken < typeInfoTokens.size()) { + Token separator = typeInfoTokens.get(iToken); + if (",".equals(separator.text) || ";".equals(separator.text) + || ":".equals(separator.text)) { + iToken++; + } else { + throw new IllegalArgumentException( + "Error: ',', ':', or ';' expected at position " + + separator.position + " from '" + typeInfoString + "' " + + typeInfoTokens); + } + } + } + return typeInfos; + } + + private Token peek() { + if (iToken < typeInfoTokens.size()) { + return typeInfoTokens.get(iToken); + } else { + return null; + } + } + + private Token expect(String item) { + return expect(item, null); + } + + private Token expect(String item, String alternative) { + if (iToken >= typeInfoTokens.size()) { + throw new IllegalArgumentException("Error: " + item + + " expected at the end of '" + typeInfoString + "'"); + } + Token t = typeInfoTokens.get(iToken); + if (item.equals("type")) { + if (!serdeConstants.LIST_TYPE_NAME.equals(t.text) + && !serdeConstants.MAP_TYPE_NAME.equals(t.text) + && !serdeConstants.STRUCT_TYPE_NAME.equals(t.text) + && !serdeConstants.UNION_TYPE_NAME.equals(t.text) + && null == PrimitiveTypeEntry.fromTypeName(t.text) + && !t.text.equals(alternative)) { + throw new IllegalArgumentException("Error: " + item + + " expected at the position " + t.position + " of '" + + typeInfoString + "' but '" + t.text + "' is found."); + } + } else if (item.equals("name")) { + if (!t.isType && !t.text.equals(alternative)) { + throw new IllegalArgumentException("Error: " + item + + " expected at the position " + t.position + " of '" + + typeInfoString + "' but '" + t.text + "' is found."); + } + } else { + if (!item.equals(t.text) && !t.text.equals(alternative)) { + throw new IllegalArgumentException("Error: " + item + + " expected at the position " + t.position + " of '" + + typeInfoString + "' but '" + t.text + "' is found."); + } + } + iToken++; + return t; + } + + private String[] parseParams() { + List params = new LinkedList(); + + Token t = peek(); + if (t != null && t.text.equals("(")) { + expect("("); + + // checking for null in the for-loop condition prevents null-ptr exception + // and allows us to fail more gracefully with a parsing error. + for(t = peek(); (t == null) || !t.text.equals(")"); t = expect(",",")")) { + params.add(expect("name").text); + } + if (params.size() == 0) { + throw new IllegalArgumentException( + "type parameters expected for type string " + typeInfoString); + } + } + + return params.toArray(new String[params.size()]); + } + + private TypeInfo parseType() { + + Token t = expect("type"); + + // Is this a primitive type? + PrimitiveTypeEntry typeEntry = + PrimitiveTypeEntry.fromTypeName(t.text); + if (typeEntry != null && typeEntry.primitiveCategory != PrimitiveObjectInspector.PrimitiveCategory.UNKNOWN ) { + String[] params = parseParams(); + switch (typeEntry.primitiveCategory) { + case CHAR: + case VARCHAR: + if (params == null || params.length == 0) { + throw new IllegalArgumentException(typeEntry.typeName + + " type is specified without length: " + typeInfoString); + } + + int length = 1; + if (params.length == 1) { + length = Integer.parseInt(params[0]); + if (typeEntry.primitiveCategory == PrimitiveObjectInspector.PrimitiveCategory.VARCHAR) { + PrimitiveTypeInfoValidationUtils.validateVarcharParameter(length); + return TypeInfoFactory.getVarcharTypeInfo(length); + } else { + PrimitiveTypeInfoValidationUtils.validateCharParameter(length); + return TypeInfoFactory.getCharTypeInfo(length); + } + } else if (params.length > 1) { + throw new IllegalArgumentException( + "Type " + typeEntry.typeName+ " only takes one parameter, but " + + params.length + " is seen"); + } + + case DECIMAL: + int precision = serdeConstants.USER_DEFAULT_PRECISION; + int scale = serdeConstants.USER_DEFAULT_SCALE; + if (params == null || params.length == 0) { + // It's possible that old metadata still refers to "decimal" as a column type w/o + // precision/scale. In this case, the default (10,0) is assumed. Thus, do nothing here. + } else if (params.length == 1) { + // only precision is specified + precision = Integer.valueOf(params[0]); + PrimitiveTypeInfoValidationUtils.validateParameter(precision, scale); + } else if (params.length == 2) { + // New metadata always have two parameters. + precision = Integer.parseInt(params[0]); + scale = Integer.parseInt(params[1]); + PrimitiveTypeInfoValidationUtils.validateParameter(precision, scale); + } else if (params.length > 2) { + throw new IllegalArgumentException("Type decimal only takes two parameter, but " + + params.length + " is seen"); + } + return TypeInfoFactory.getDecimalTypeInfo(precision, scale); + + default: + return TypeInfoFactory.getPrimitiveTypeInfo(typeEntry.typeName); + } + } + + // Is this a list type? + if (serdeConstants.LIST_TYPE_NAME.equals(t.text)) { + expect("<"); + TypeInfo listElementType = parseType(); + expect(">"); + return TypeInfoFactory.getListTypeInfo(listElementType); + } + + // Is this a map type? + if (serdeConstants.MAP_TYPE_NAME.equals(t.text)) { + expect("<"); + TypeInfo mapKeyType = parseType(); + expect(","); + TypeInfo mapValueType = parseType(); + expect(">"); + return TypeInfoFactory.getMapTypeInfo(mapKeyType, mapValueType); + } + + // Is this a struct type? + if (serdeConstants.STRUCT_TYPE_NAME.equals(t.text)) { + ArrayList fieldNames = new ArrayList(); + ArrayList fieldTypeInfos = new ArrayList(); + boolean first = true; + do { + if (first) { + expect("<"); + first = false; + } else { + Token separator = expect(">", ","); + if (separator.text.equals(">")) { + // end of struct + break; + } + } + Token name = expect("name",">"); + if (name.text.equals(">")) { + break; + } + fieldNames.add(name.text); + expect(":"); + fieldTypeInfos.add(parseType()); + } while (true); + + return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); + } + // Is this a union type? + if (serdeConstants.UNION_TYPE_NAME.equals(t.text)) { + List objectTypeInfos = new ArrayList(); + boolean first = true; + do { + if (first) { + expect("<"); + first = false; + } else { + Token separator = expect(">", ","); + if (separator.text.equals(">")) { + // end of union + break; + } + } + objectTypeInfos.add(parseType()); + } while (true); + + return TypeInfoFactory.getUnionTypeInfo(objectTypeInfos); + } + + throw new RuntimeException("Internal error parsing position " + + t.position + " of '" + typeInfoString + "'"); + } + + public PrimitiveParts parsePrimitiveParts() { + PrimitiveParts parts = new PrimitiveParts(); + Token t = expect("type"); + parts.typeName = t.text; + parts.typeParams = parseParams(); + return parts; + } + + public static class PrimitiveParts { + public String typeName; + public String[] typeParams; + } +}