diff --git pom.xml pom.xml
index 240472a30e..c7547decb0 100644
--- pom.xml
+++ pom.xml
@@ -192,7 +192,7 @@
2.0.0-M5
4.1.17.Final
3.10.5.Final
- 1.10.0
+ 1.11.0
0.16.0
1.5.6
2.5.0
@@ -234,6 +234,12 @@
false
+
+
+ parquet
+ parquet staging repository
+ http://repository.apache.org/content/groups/staging
+
glassfish-repository
http://maven.glassfish.org/content/groups/glassfish
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 9010ac36cd..a2f275cafb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -16,6 +16,7 @@
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Map;
+import java.util.Optional;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -41,7 +42,11 @@
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.PrimitiveConverter;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
/**
@@ -337,10 +342,7 @@ public void addInt(final int value) {
return new PrimitiveConverter() {
@Override
public void addInt(final int value) {
- if (value >= ((OriginalType.UINT_8 == type.getOriginalType() ||
- OriginalType.UINT_16 == type.getOriginalType() ||
- OriginalType.UINT_32 == type.getOriginalType() ||
- OriginalType.UINT_64 == type.getOriginalType()) ? 0 :
+ if (value >= ((ETypeConverter.isUnsignedInteger(type)) ? 0 :
Integer.MIN_VALUE)) {
parent.set(index, new IntWritable(value));
} else {
@@ -444,10 +446,7 @@ public void addLong(final long value) {
return new PrimitiveConverter() {
@Override
public void addLong(final long value) {
- if (value >= ((OriginalType.UINT_8 == type.getOriginalType() ||
- OriginalType.UINT_16 == type.getOriginalType() ||
- OriginalType.UINT_32 == type.getOriginalType() ||
- OriginalType.UINT_64 == type.getOriginalType()) ? 0 : Long.MIN_VALUE)) {
+ if (value >= ((ETypeConverter.isUnsignedInteger(type)) ? 0 : Long.MIN_VALUE)) {
parent.set(index, new LongWritable(value));
} else {
parent.set(index, null);
@@ -586,7 +585,9 @@ protected HiveDecimalWritable convert(Binary binary) {
return new BinaryConverter(type, parent, index) {
@Override
protected HiveDecimalWritable convert(Binary binary) {
- return new HiveDecimalWritable(binary.getBytes(), type.getDecimalMetadata().getScale());
+ DecimalLogicalTypeAnnotation logicalType =
+ (DecimalLogicalTypeAnnotation) type.getLogicalTypeAnnotation();
+ return new HiveDecimalWritable(binary.getBytes(), logicalType.getScale());
}
};
}
@@ -634,17 +635,33 @@ private ETypeConverter(final Class> type) {
abstract PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo);
public static PrimitiveConverter getNewConverter(final PrimitiveType type, final int index,
- final ConverterParent parent, TypeInfo hiveTypeInfo) {
+ final ConverterParent parent, final TypeInfo hiveTypeInfo) {
if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
//TODO- cleanup once parquet support Timestamp type annotation.
return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
}
- if (OriginalType.DECIMAL == type.getOriginalType()) {
- return EDECIMAL_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
- } else if (OriginalType.UTF8 == type.getOriginalType()) {
- return ESTRING_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
- } else if (OriginalType.DATE == type.getOriginalType()) {
- return EDATE_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
+ if (type.getLogicalTypeAnnotation() != null) {
+ Optional converter = type.getLogicalTypeAnnotation()
+ .accept(new LogicalTypeAnnotationVisitor() {
+ @Override
+ public Optional visit(DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(EDECIMAL_CONVERTER.getConverter(type, index, parent, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(StringLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(ESTRING_CONVERTER.getConverter(type, index, parent, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(DateLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(EDATE_CONVERTER.getConverter(type, index, parent, hiveTypeInfo));
+ }
+ });
+
+ if (converter.isPresent()) {
+ return converter.get();
+ }
}
Class> javaType = type.getPrimitiveTypeName().javaType;
@@ -657,11 +674,22 @@ public static PrimitiveConverter getNewConverter(final PrimitiveType type, final
throw new IllegalArgumentException("Converter not found ... for type : " + type);
}
+ public static boolean isUnsignedInteger(final PrimitiveType type) {
+ if (type.getLogicalTypeAnnotation() != null) {
+ Optional isUnsignedInteger = type.getLogicalTypeAnnotation()
+ .accept(new LogicalTypeAnnotationVisitor() {
+ @Override public Optional visit(
+ LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+ return Optional.of(!intLogicalType.isSigned());
+ }
+ });
+ if (isUnsignedInteger.isPresent()) return isUnsignedInteger.get();
+ }
+ return false;
+ }
+
private static long getMinValue(final PrimitiveType type, String typeName, long defaultValue) {
- if (OriginalType.UINT_8 == type.getOriginalType() ||
- OriginalType.UINT_16 == type.getOriginalType() ||
- OriginalType.UINT_32 == type.getOriginalType() ||
- OriginalType.UINT_64 == type.getOriginalType()) {
+ if(isUnsignedInteger(type)) {
return 0;
} else {
switch (typeName) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
index c0124575ea..63a7d53e92 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
@@ -19,11 +19,15 @@
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapKeyValueTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import java.util.Map;
+import java.util.Optional;
public abstract class HiveGroupConverter extends GroupConverter implements ConverterParent {
@@ -46,17 +50,34 @@ protected static PrimitiveConverter getConverterFromDescription(PrimitiveType ty
return ETypeConverter.getNewConverter(type, index, parent, hiveTypeInfo);
}
- protected static HiveGroupConverter getConverterFromDescription(GroupType type, int index, ConverterParent parent,
- TypeInfo hiveTypeInfo) {
+ protected static HiveGroupConverter getConverterFromDescription(final GroupType type,
+ final int index, final ConverterParent parent, final TypeInfo hiveTypeInfo) {
if (type == null) {
return null;
}
- OriginalType annotation = type.getOriginalType();
- if (annotation == OriginalType.LIST) {
- return HiveCollectionConverter.forList(type, parent, index, hiveTypeInfo);
- } else if (annotation == OriginalType.MAP || annotation == OriginalType.MAP_KEY_VALUE) {
- return HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo);
+ if (type.getLogicalTypeAnnotation() != null) {
+ Optional converter =
+ type.getLogicalTypeAnnotation().accept(new LogicalTypeAnnotationVisitor(){
+ @Override
+ public Optional visit(ListLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(HiveCollectionConverter.forList(type, parent, index, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(MapLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(MapKeyValueTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo));
+ }
+ });
+
+ if (converter.isPresent()) {
+ return converter.get();
+ }
}
return new HiveStructConverter(type, parent, index, hiveTypeInfo);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
index 302321c7f8..21bfb2e1a2 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
@@ -26,8 +26,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.parquet.schema.ConversionPatterns;
import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
@@ -60,16 +60,16 @@ private static Type convertType(final String name, final TypeInfo typeInfo,
final Repetition repetition) {
if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
- return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
- .named(name);
+ return Types.primitive(PrimitiveTypeName.BINARY, repetition)
+ .as(LogicalTypeAnnotation.stringType()).named(name);
} else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
} else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition)
- .as(OriginalType.INT_16).named(name);
+ .as(LogicalTypeAnnotation.intType(16, true)).named(name);
} else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition)
- .as(OriginalType.INT_8).named(name);
+ .as(LogicalTypeAnnotation.intType(8, true)).named(name);
} else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
} else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
@@ -86,22 +86,22 @@ private static Type convertType(final String name, final TypeInfo typeInfo,
throw new UnsupportedOperationException("Void type not implemented");
} else if (typeInfo.getTypeName().toLowerCase().startsWith(
serdeConstants.CHAR_TYPE_NAME)) {
- return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+ return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType())
.named(name);
} else if (typeInfo.getTypeName().toLowerCase().startsWith(
serdeConstants.VARCHAR_TYPE_NAME)) {
- return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+ return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType())
.named(name);
} else if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
int prec = decimalTypeInfo.precision();
int scale = decimalTypeInfo.scale();
int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
- return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).
- scale(scale).precision(prec).named(name);
+ return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes)
+ .as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
} else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
- return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named
- (name);
+ return Types.primitive(PrimitiveTypeName.INT32, repetition)
+ .as(LogicalTypeAnnotation.dateType()).named(name);
} else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
} else {
@@ -122,19 +122,21 @@ private static Type convertType(final String name, final TypeInfo typeInfo,
// An optional group containing a repeated anonymous group "bag", containing
// 1 anonymous element "array_element"
- @SuppressWarnings("deprecation")
private static GroupType convertArrayType(final String name, final ListTypeInfo typeInfo) {
final TypeInfo subType = typeInfo.getListElementTypeInfo();
- return new GroupType(Repetition.OPTIONAL, name, OriginalType.LIST, new GroupType(Repetition.REPEATED,
- ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType)));
+ GroupType groupType = Types.optionalGroup().as(LogicalTypeAnnotation.listType())
+ .addField(Types.repeatedGroup().addField(convertType("array_element", subType))
+ .named(ParquetHiveSerDe.ARRAY.toString()))
+ .named(name);
+ return groupType;
}
// An optional group containing multiple elements
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo) {
final List columnNames = typeInfo.getAllStructFieldNames();
final List columnTypes = typeInfo.getAllStructFieldTypeInfos();
- return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes));
-
+ GroupType groupType = Types.optionalGroup().addFields(convertTypes(columnNames, columnTypes)).named(name);
+ return groupType;
}
// An optional group containing a repeated anonymous group "map", containing
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
index 7f2a684d28..9c7fd57aec 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
@@ -43,8 +43,8 @@
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
@@ -157,8 +157,8 @@ private static Type getProjectedType(TypeInfo colType, Type fieldType) {
} else {
subFieldType = getProjectedType(elemType, subFieldType);
}
- return Types.buildGroup(Repetition.OPTIONAL).as(OriginalType.LIST).addFields(
- subFieldType).named(fieldType.getName());
+ return Types.buildGroup(Repetition.OPTIONAL).as(LogicalTypeAnnotation.listType())
+ .addFields(subFieldType).named(fieldType.getName());
}
}
break;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
index c1d71337d8..8584d6999d 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
@@ -21,6 +21,7 @@
import org.apache.hadoop.hive.common.type.HiveBaseChar;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
import org.apache.hadoop.hive.serde.serdeConstants;
@@ -37,7 +38,10 @@
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.column.values.ValuesReader;
import org.apache.parquet.io.api.Binary;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import java.io.IOException;
@@ -45,6 +49,7 @@
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
+import java.util.Optional;
/**
* Parquet file has self-describing schema which may differ from the user required schema (e.g.
@@ -1498,10 +1503,7 @@ private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean i
switch (parquetType.getPrimitiveTypeName()) {
case INT32:
- if (OriginalType.UINT_8 == parquetType.getOriginalType() ||
- OriginalType.UINT_16 == parquetType.getOriginalType() ||
- OriginalType.UINT_32 == parquetType.getOriginalType() ||
- OriginalType.UINT_64 == parquetType.getOriginalType()) {
+ if (ETypeConverter.isUnsignedInteger(parquetType)) {
return isDictionary ? new TypesFromUInt32PageReader(dictionary, length, hivePrecision,
hiveScale) : new TypesFromUInt32PageReader(valuesReader, length, hivePrecision,
hiveScale);
@@ -1511,10 +1513,7 @@ private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean i
hiveScale);
}
case INT64:
- if (OriginalType.UINT_8 == parquetType.getOriginalType() ||
- OriginalType.UINT_16 == parquetType.getOriginalType() ||
- OriginalType.UINT_32 == parquetType.getOriginalType() ||
- OriginalType.UINT_64 == parquetType.getOriginalType()) {
+ if (ETypeConverter.isUnsignedInteger(parquetType)) {
return isDictionary ? new TypesFromUInt64PageReader(dictionary, length, hivePrecision,
hiveScale) : new TypesFromUInt64PageReader(valuesReader, length, hivePrecision,
hiveScale);
@@ -1552,7 +1551,7 @@ private static ParquetDataColumnReader getConvertorFromBinary(boolean isDict,
TypeInfo hiveType,
ValuesReader valuesReader,
Dictionary dictionary) {
- OriginalType originalType = parquetType.getOriginalType();
+ LogicalTypeAnnotation logicalType = parquetType.getLogicalTypeAnnotation();
// max length for varchar and char cases
int length = getVarcharLength(hiveType);
@@ -1568,22 +1567,40 @@ private static ParquetDataColumnReader getConvertorFromBinary(boolean isDict,
int hiveScale = (typeName.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) ?
((DecimalTypeInfo) realHiveType).getScale() : 0;
- if (originalType == null) {
- return isDict ? new DefaultParquetDataColumnReader(dictionary, length) : new
- DefaultParquetDataColumnReader(valuesReader, length);
- }
- switch (originalType) {
- case DECIMAL:
- final short scale = (short) parquetType.asPrimitiveType().getDecimalMetadata().getScale();
- return isDict ? new TypesFromDecimalPageReader(dictionary, length, scale, hivePrecision, hiveScale) : new
- TypesFromDecimalPageReader(valuesReader, length, scale, hivePrecision, hiveScale);
- case UTF8:
- return isDict ? new TypesFromStringPageReader(dictionary, length) : new
- TypesFromStringPageReader(valuesReader, length);
- default:
+ if (logicalType == null) {
return isDict ? new DefaultParquetDataColumnReader(dictionary, length) : new
DefaultParquetDataColumnReader(valuesReader, length);
}
+
+ Optional reader =
+ parquetType.getLogicalTypeAnnotation()
+ .accept(new LogicalTypeAnnotationVisitor(){
+ @Override
+ public Optional visit(
+ DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
+ final short scale = (short) logicalTypeAnnotation.getScale();
+ return isDict
+ ? Optional.of(
+ new TypesFromDecimalPageReader(dictionary, length, scale, hivePrecision, hiveScale))
+ : Optional.of(
+ new TypesFromDecimalPageReader(valuesReader, length, scale, hivePrecision, hiveScale));
+ }
+
+ @Override
+ public Optional visit(
+ StringLogicalTypeAnnotation logicalTypeAnnotation) {
+ return isDict
+ ? Optional.of(new TypesFromStringPageReader(dictionary, length))
+ : Optional.of(new TypesFromStringPageReader(valuesReader, length));
+ }
+ });
+
+ if (reader.isPresent()) {
+ return reader.get();
+ }
+
+ return isDict ? new DefaultParquetDataColumnReader(dictionary, length) : new
+ DefaultParquetDataColumnReader(valuesReader, length);
}
public static ParquetDataColumnReader getDataColumnReaderByTypeOnDictionary(
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
index 3d61c33afd..bd519eb66e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
@@ -44,7 +44,9 @@
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -141,12 +143,12 @@ private DataWriter createWriter(ObjectInspector inspector, Type type) {
}
} else {
GroupType groupType = type.asGroupType();
- OriginalType originalType = type.getOriginalType();
+ LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
- if (originalType != null && originalType.equals(OriginalType.LIST)) {
+ if (logicalType != null && logicalType instanceof ListLogicalTypeAnnotation) {
checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
return new ListDataWriter((ListObjectInspector)inspector, groupType);
- } else if (originalType != null && originalType.equals(OriginalType.MAP)) {
+ } else if (logicalType != null && logicalType instanceof MapLogicalTypeAnnotation) {
checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
return new MapDataWriter((MapObjectInspector)inspector, groupType);
} else {
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java
index 17eca38111..c53d66ddc4 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java
@@ -18,14 +18,18 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.Type;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
public class HiveParquetSchemaTestUtils {
@@ -67,9 +71,37 @@ public static void testConversion(
List expectedFields = expectedMT.getFields();
List actualFields = messageTypeFound.getFields();
for (int i = 0, n = expectedFields.size(); i < n; ++i) {
- OriginalType exp = expectedFields.get(i).getOriginalType();
- OriginalType act = actualFields.get(i).getOriginalType();
- assertEquals("Original types of the field do not match", exp, act);
+
+ LogicalTypeAnnotation expectedLogicalType = expectedFields.get(i).getLogicalTypeAnnotation();
+ LogicalTypeAnnotation actualLogicalType = actualFields.get(i).getLogicalTypeAnnotation();
+ assertEquals("Logical type annotations of the field do not match", expectedLogicalType, actualLogicalType);
}
}
+
+ public static void testLogicalTypeAnnotation(String hiveColumnType, String hiveColumnName,
+ LogicalTypeAnnotation expectedLogicalType) throws Exception {
+ Map expectedLogicalTypeForColumn = new HashMap<>();
+ expectedLogicalTypeForColumn.put(hiveColumnName, expectedLogicalType);
+ testLogicalTypeAnnotations(hiveColumnName, hiveColumnType, expectedLogicalTypeForColumn);
+ }
+
+ public static void testLogicalTypeAnnotations(final String hiveColumnNames,
+ final String hiveColumnTypes, final Map expectedLogicalTypes)
+ throws Exception {
+ final List columnNames = createHiveColumnsFrom(hiveColumnNames);
+ final List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
+ final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
+ List actualFields = messageTypeFound.getFields();
+ for (Type actualField : actualFields) {
+ LogicalTypeAnnotation expectedLogicalType = expectedLogicalTypes.get(actualField.getName());
+ LogicalTypeAnnotation actualLogicalType = actualField.getLogicalTypeAnnotation();
+ if (expectedLogicalType != null) {
+ assertNotNull("The logical type annotation cannot be null.", actualLogicalType);
+ assertEquals("Logical type annotations of the field do not match", expectedLogicalType, actualLogicalType);
+ }
+ else {
+ assertNull("The logical type annotation must be null.", actualLogicalType);
+ }
+ }
+ }
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java
index f2814f6943..aea0bf9492 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java
@@ -28,10 +28,10 @@
import org.junit.Assert;
import org.junit.Test;
import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Types;
-import static org.apache.parquet.schema.OriginalType.LIST;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
@@ -123,7 +123,7 @@ public void write(RecordConsumer rc) {
public void testThriftPrimitiveInList() throws Exception {
Path test = writeDirect("ThriftPrimitiveInList",
Types.buildMessage()
- .requiredGroup().as(LIST)
+ .requiredGroup().as(LogicalTypeAnnotation.listType())
.repeated(INT32).named("list_of_ints_tuple")
.named("list_of_ints")
.named("ThriftPrimitiveInList"),
@@ -163,7 +163,7 @@ public void testThriftSingleFieldGroupInList() throws Exception {
Path test = writeDirect("ThriftSingleFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(INT64).named("count")
.named("single_element_groups_tuple")
@@ -212,7 +212,7 @@ public void write(RecordConsumer rc) {
public void testAvroPrimitiveInList() throws Exception {
Path test = writeDirect("AvroPrimitiveInList",
Types.buildMessage()
- .requiredGroup().as(LIST)
+ .requiredGroup().as(LogicalTypeAnnotation.listType())
.repeated(INT32).named("array")
.named("list_of_ints")
.named("AvroPrimitiveInList"),
@@ -252,7 +252,7 @@ public void testAvroSingleFieldGroupInList() throws Exception {
Path test = writeDirect("AvroSingleFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(INT64).named("count")
.named("array")
@@ -304,7 +304,7 @@ public void testAmbiguousSingleFieldGroupInList() throws Exception {
Path test = writeDirect("SingleFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(INT64).named("count")
.named("single_element_group")
@@ -355,7 +355,7 @@ public void testMultiFieldGroupInList() throws Exception {
Path test = writeDirect("MultiFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(DOUBLE).named("latitude")
.required(DOUBLE).named("longitude")
@@ -411,7 +411,7 @@ public void write(RecordConsumer rc) {
public void testNewOptionalGroupInList() throws Exception {
Path test = writeDirect("NewOptionalGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.optionalGroup()
.required(DOUBLE).named("latitude")
@@ -488,7 +488,7 @@ public void write(RecordConsumer rc) {
public void testNewRequiredGroupInList() throws Exception {
Path test = writeDirect("NewRequiredGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.requiredGroup()
.required(DOUBLE).named("latitude")
@@ -561,7 +561,7 @@ public void testHiveRequiredGroupInList() throws Exception {
// this matches the list structure that Hive writes
Path test = writeDirect("HiveRequiredGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.requiredGroup()
.required(DOUBLE).named("latitude")
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
index e1b2ba1021..dc80af1b76 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
@@ -18,12 +18,19 @@
import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.testConversion;
import static org.junit.Assert.assertEquals;
+import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.testLogicalTypeAnnotation;
+
+import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
import org.junit.Test;
@@ -33,13 +40,16 @@
@Test
public void testSimpleType() throws Exception {
testConversion(
- "a,b,c,d",
- "int,bigint,double,boolean",
+ "a,b,c,d,e,f,g",
+ "int,bigint,double,boolean,string,float,binary",
"message hive_schema {\n"
+ " optional int32 a;\n"
+ " optional int64 b;\n"
+ " optional double c;\n"
+ " optional boolean d;\n"
+ + " optional binary e (UTF8);\n"
+ + " optional float f;\n"
+ + " optional binary g;\n"
+ "}\n");
}
@@ -54,6 +64,17 @@ public void testSpecialIntType() throws Exception {
+ "}\n");
}
+ @Test
+ public void testSpecialIntTypeWithLogicatlTypeAnnotations() throws Exception {
+ testConversion(
+ "a,b",
+ "tinyint,smallint",
+ "message hive_schema {\n"
+ + " optional int32 a (INTEGER(8,true));\n"
+ + " optional int32 b (INTEGER(16,true));\n"
+ + "}\n");
+ }
+
@Test
public void testDecimalType() throws Exception {
testConversion(
@@ -94,6 +115,16 @@ public void testDateType() throws Exception {
+ "}\n");
}
+ @Test
+ public void testTimestampType() throws Exception {
+ testConversion(
+ "a",
+ "timestamp",
+ "message hive_schema {\n"
+ + " optional int96 a;\n"
+ + "}\n");
+ }
+
@Test
public void testArray() throws Exception {
testConversion("arrayCol",
@@ -120,6 +151,99 @@ public void testArrayDecimal() throws Exception {
+ "}\n");
}
+ @Test
+ public void testArrayTinyInt() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int32 array_element (INT_8);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArraySmallInt() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int32 array_element (INT_16);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayString() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional binary array_element (UTF8);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayTimestamp() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int96 array_element;\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayStruct() throws Exception {
+ testConversion("structCol",
+ "array>",
+ "message hive_schema {\n"
+ + " optional group structCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional group array_element {\n"
+ + " optional binary a (UTF8);\n"
+ + " optional int32 b;\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayInArray() throws Exception {
+ final List columnNames = createHiveColumnsFrom("arrayCol");
+ ListTypeInfo listTypeInfo = new ListTypeInfo();
+ listTypeInfo.setListElementTypeInfo(TypeInfoUtils.getTypeInfosFromTypeString("int").get(0));
+ List typeInfos = new ArrayList<>();
+ ListTypeInfo listTypeInfo2 = new ListTypeInfo();
+ listTypeInfo2.setListElementTypeInfo(listTypeInfo);
+ typeInfos.add(listTypeInfo2);
+ final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, typeInfos);
+ final MessageType expectedMT = MessageTypeParser.parseMessageType(
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional group array_element (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int32 array_element;\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ assertEquals(expectedMT, messageTypeFound);
+ }
+
@Test
public void testStruct() throws Exception {
testConversion("structCol",
@@ -134,6 +258,61 @@ public void testStruct() throws Exception {
+ "}\n");
}
+ @Test
+ public void testStructInts() throws Exception {
+ testConversion("structCol",
+ "struct",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional int32 a (INT_8);\n"
+ + " optional int32 b (INT_16);\n"
+ + " optional int32 c;\n"
+ + " optional int64 d;\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testStructStrings() throws Exception {
+ testConversion("structCol",
+ "struct",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional binary a (UTF8);\n"
+ + " optional binary b (UTF8);\n"
+ + " optional binary c (UTF8);\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testStructTimestamp() throws Exception {
+ testConversion("structCol",
+ "struct",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional int96 a;\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testStructList() throws Exception {
+ testConversion("structCol",
+ "struct,b:int,c:string>",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional group a (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional binary array_element (UTF8);\n"
+ + " }\n"
+ + " }\n"
+ + " optional int32 b;\n"
+ + " optional binary c (UTF8);"
+ + " }\n"
+ + "}\n");
+ }
+
@Test
public void testMap() throws Exception {
testConversion("mapCol",
@@ -162,25 +341,131 @@ public void testMapDecimal() throws Exception {
+ "}\n");
}
+ @Test
+ public void testMapInts() throws Exception {
+ testConversion("mapCol",
+ "map",
+ "message hive_schema {\n"
+ + " optional group mapCol (MAP) {\n"
+ + " repeated group map (MAP_KEY_VALUE) {\n"
+ + " required int32 key (INT_16);\n"
+ + " optional int32 value (INT_8);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testMapStruct() throws Exception {
+ testConversion("mapCol",
+ "map>",
+ "message hive_schema {\n"
+ + " optional group mapCol (MAP) {\n"
+ + " repeated group map (MAP_KEY_VALUE) {\n"
+ + " required binary key (UTF8);\n"
+ + " optional group value {\n"
+ + " optional int32 a (INT_16);\n"
+ + " optional int32 b;\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testMapList() throws Exception {
+ testConversion("mapCol",
+ "map>",
+ "message hive_schema {\n"
+ + " optional group mapCol (MAP) {\n"
+ + " repeated group map (MAP_KEY_VALUE) {\n"
+ + " required binary key (UTF8);\n"
+ + " optional group value (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional binary array_element (UTF8);\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testLogicalTypes() throws Exception {
+ testLogicalTypeAnnotation("string", "a", LogicalTypeAnnotation.stringType());
+ testLogicalTypeAnnotation("int", "a", null);
+ testLogicalTypeAnnotation("smallint", "a", LogicalTypeAnnotation.intType(16, true));
+ testLogicalTypeAnnotation("tinyint", "a", LogicalTypeAnnotation.intType(8, true));
+ testLogicalTypeAnnotation("bigint", "a", null);
+ testLogicalTypeAnnotation("double", "a", null);
+ testLogicalTypeAnnotation("float", "a", null);
+ testLogicalTypeAnnotation("boolean", "a", null);
+ testLogicalTypeAnnotation("binary", "a", null);
+ testLogicalTypeAnnotation("timestamp", "a", null);
+ testLogicalTypeAnnotation("char(3)", "a", LogicalTypeAnnotation.stringType());
+ testLogicalTypeAnnotation("varchar(30)", "a", LogicalTypeAnnotation.stringType());
+ testLogicalTypeAnnotation("decimal(7,2)", "a", LogicalTypeAnnotation.decimalType(2, 7));
+ }
+
@Test
public void testMapOriginalType() throws Exception {
- final String hiveColumnTypes = "map";
- final String hiveColumnNames = "mapCol";
- final List columnNames = createHiveColumnsFrom(hiveColumnNames);
- final List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
- final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
+ final MessageType messageTypeFound = createSchema("map", "mapCol");
// this messageType only has one optional field, whose name is mapCol, original Type is MAP
assertEquals(1, messageTypeFound.getFieldCount());
- org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
- assertEquals("mapCol",topLevel.getName());
- assertEquals(OriginalType.MAP, topLevel.getOriginalType());
- assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());
+ Type topLevel = messageTypeFound.getFields().get(0);
+ checkField(topLevel, "mapCol", Repetition.OPTIONAL, LogicalTypeAnnotation.mapType());
assertEquals(1, topLevel.asGroupType().getFieldCount());
- org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
- //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
- assertEquals("map", secondLevel.getName());
- assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
- assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
+ Type secondLevel = topLevel.asGroupType().getFields().get(0);
+ // there is one repeated field for mapCol, the field name is "map" and its original Type is
+ // MAP_KEY_VALUE;
+ checkField(secondLevel, "map", Repetition.REPEATED, LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance());
+ }
+
+ @Test
+ public void testListOriginalType() throws Exception {
+
+ final MessageType messageTypeFound = createSchema("array", "arrayCol");
+
+ assertEquals(1, messageTypeFound.getFieldCount());
+ Type topLevel = messageTypeFound.getFields().get(0);
+ checkField(topLevel, "arrayCol", Repetition.OPTIONAL, LogicalTypeAnnotation.listType());
+
+ assertEquals(1, topLevel.asGroupType().getFieldCount());
+ Type secondLevel = topLevel.asGroupType().getFields().get(0);
+ checkField(secondLevel, "bag", Repetition.REPEATED, null);
+
+ assertEquals(1, secondLevel.asGroupType().getFieldCount());
+ Type thirdLevel = secondLevel.asGroupType().getFields().get(0);
+ checkField(thirdLevel, "array_element", Repetition.OPTIONAL, LogicalTypeAnnotation.intType(8, true));
+ }
+
+ @Test
+ public void testStructOriginalType() throws Exception {
+
+ final MessageType messageTypeFound = createSchema("struct", "structCol");
+
+ assertEquals(1, messageTypeFound.getFieldCount());
+ Type topLevel = messageTypeFound.getFields().get(0);
+ checkField(topLevel, "structCol", Repetition.OPTIONAL, null);
+
+ assertEquals(2, topLevel.asGroupType().getFieldCount());
+ Type a = topLevel.asGroupType().getFields().get(0);
+ checkField(a, "a", Repetition.OPTIONAL, LogicalTypeAnnotation.intType(16, true));
+ Type b = topLevel.asGroupType().getFields().get(1);
+ checkField(b, "b", Repetition.OPTIONAL, LogicalTypeAnnotation.stringType());
+ }
+
+ private MessageType createSchema(String hiveColumnTypes, String hiveColumnNames) {
+ List columnNames = createHiveColumnsFrom(hiveColumnNames);
+ List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
+ return HiveSchemaConverter.convert(columnNames, columnTypes);
+ }
+
+ private void checkField(Type field, String expectedName, Repetition expectedRepetition,
+ LogicalTypeAnnotation expectedLogicalType) {
+ assertEquals(expectedName, field.getName());
+ assertEquals(expectedLogicalType, field.getLogicalTypeAnnotation());
+ assertEquals(expectedRepetition, field.getRepetition());
}
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java
index 7717f3c418..7de25ddb91 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java
@@ -29,9 +29,9 @@
import org.junit.Test;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.Types;
-import static org.apache.parquet.schema.OriginalType.*;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
public class TestMapStructures extends AbstractTestParquetDirect {
@@ -40,9 +40,9 @@
public void testStringMapRequiredPrimitive() throws Exception {
Path test = writeDirect("StringMapRequiredPrimitive",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
.required(INT32).named("value")
.named("key_value")
.named("votes")
@@ -100,9 +100,9 @@ public void write(RecordConsumer rc) {
public void testStringMapOptionalPrimitive() throws Exception {
Path test = writeDirect("StringMapOptionalPrimitive",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
.optional(INT32).named("value")
.named("key_value")
.named("votes")
@@ -170,12 +170,12 @@ public void testStringMapOfOptionalArray() throws Exception {
Path test = writeDirect("StringMapOfOptionalArray",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
- .optionalGroup().as(LIST)
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
- .optional(BINARY).as(UTF8).named("element")
+ .optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("element")
.named("list")
.named("value")
.named("key_value")
@@ -250,10 +250,10 @@ public void testStringMapOfOptionalIntArray() throws Exception {
Path test = writeDirect("StringMapOfOptionalIntArray",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
- .optionalGroup().as(LIST)
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.optional(INT32).named("element")
.named("list")
@@ -343,7 +343,7 @@ public void write(RecordConsumer rc) {
public void testMapWithComplexKey() throws Exception {
Path test = writeDirect("MapWithComplexKey",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
.requiredGroup()
.required(INT32).named("x")
@@ -404,7 +404,7 @@ public void write(RecordConsumer rc) {
public void testDoubleMapWithStructValue() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
.optional(DOUBLE).named("key")
.optionalGroup()
@@ -465,12 +465,12 @@ public void write(RecordConsumer rc) {
public void testNestedMap() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .optional(BINARY).as(UTF8).named("key")
- .optionalGroup().as(MAP)
+ .optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .optional(BINARY).as(UTF8).named("key")
+ .optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
.required(INT32).named("value")
.named("key_value")
.named("value")
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/MyConverterParent.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/MyConverterParent.java
new file mode 100644
index 0000000000..3e3e4ff5ef
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/MyConverterParent.java
@@ -0,0 +1,29 @@
+package org.apache.hadoop.hive.ql.io.parquet.convert;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.io.Writable;
+
+public class MyConverterParent implements ConverterParent {
+
+ private Writable value;
+
+ public Writable getValue() {
+ return value;
+ }
+
+ @Override
+ public void set(int index, Writable value) {
+ this.value = value;
+ }
+
+ @Override
+ public Map getMetadata() {
+ Map metadata = new HashMap<>();
+ metadata.put(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname, "false");
+ return metadata;
+ }
+
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
new file mode 100644
index 0000000000..1a84ad847d
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
@@ -0,0 +1,239 @@
+package org.apache.hadoop.hive.ql.io.parquet.convert;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter.BinaryConverter;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type.Repetition;
+import org.apache.parquet.schema.Types;
+import org.junit.Test;
+
+public class TestETypeConverter {
+
+ @Test
+ public void testGetDecimalConverter() throws Exception {
+ TypeInfo hiveTypeInfo = new DecimalTypeInfo(7, 2);
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.decimalType(2, 7)).named("value");
+ Writable writable = getWritableFromBinaryConverter(hiveTypeInfo, primitiveType, Binary.fromString("155"));
+ HiveDecimalWritable decimalWritable = (HiveDecimalWritable) writable;
+ assertEquals(2, decimalWritable.getScale());
+ }
+
+ @Test
+ public void testGetTimestampConverter() throws Exception {
+ Timestamp timestamp = Timestamp.valueOf("2018-06-15 15:12:20.0");
+ NanoTime nanoTime = NanoTimeUtils.getNanoTime(timestamp, true);
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT96).named("value");
+ Writable writable = getWritableFromBinaryConverter(null, primitiveType, nanoTime.toBinary());
+ TimestampWritableV2 timestampWritable = (TimestampWritableV2) writable;
+ assertEquals(timestamp.getNanos(), timestampWritable.getNanos());
+ }
+
+ @Test
+ public void testGetTextConverter() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.stringType()).named("value");
+ Writable writable = getWritableFromBinaryConverter(new VarcharTypeInfo(), primitiveType,
+ Binary.fromString("this_is_a_value"));
+ Text textWritable = (Text) writable;
+ assertEquals("this_is_a_value", textWritable.toString());
+ }
+
+ @Test
+ public void testGetTextConverterNoHiveTypeInfo() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.stringType()).named("value");
+ Writable writable =
+ getWritableFromBinaryConverter(null, primitiveType, Binary.fromString("this_is_a_value"));
+ Text textWritable = (Text) writable;
+ assertEquals("this_is_a_value", textWritable.toString());
+ }
+
+ @Test
+ public void testGetIntConverterForTinyInt() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32)
+ .as(LogicalTypeAnnotation.intType(8, false)).named("value");
+ Writable writable =
+ getWritableFromPrimitiveConverter(createHiveTypeInfo("tinyint"), primitiveType, 125);
+ IntWritable intWritable = (IntWritable) writable;
+ assertEquals(125, intWritable.get());
+ }
+
+ @Test
+ public void testGetIntConverterForFloat() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("float"), primitiveType, 22225);
+ FloatWritable floatWritable = (FloatWritable) writable;
+ assertEquals((float) 22225, (float) floatWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetIntConverterForBigint() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("bigint"), primitiveType, 22225);
+ LongWritable longWritable = (LongWritable) writable;
+ assertEquals(22225, longWritable.get());
+ }
+
+ @Test
+ public void testGetIntConverterForDouble() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("double"), primitiveType, 22225);
+ DoubleWritable doubleWritable = (DoubleWritable) writable;
+ assertEquals((double) 22225, (double) doubleWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetIntConverterForSmallint() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32)
+ .as(LogicalTypeAnnotation.intType(16, false)).named("value");
+ Writable writable =
+ getWritableFromPrimitiveConverter(createHiveTypeInfo("smallint"), primitiveType, 32766);
+ IntWritable intWritable = (IntWritable) writable;
+ assertEquals(32766, intWritable.get());
+ }
+
+ @Test
+ public void testGetIntConverterNoHiveTypeInfo() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(null, primitiveType, 12225);
+ IntWritable intWritable = (IntWritable) writable;
+ assertEquals(12225, intWritable.get());
+ }
+
+ @Test
+ public void testGetDoubleConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.DOUBLE).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addDouble(3276);
+ Writable writable = converterParent.getValue();
+ DoubleWritable doubleWritable = (DoubleWritable) writable;
+ assertEquals(3276, doubleWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetBooleanConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BOOLEAN).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addBoolean(true);
+ Writable writable = converterParent.getValue();
+ BooleanWritable booleanWritable = (BooleanWritable) writable;
+ assertEquals(true, booleanWritable.get());
+ }
+
+ @Test
+ public void testGetFloatConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.FLOAT).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addFloat(3276f);
+ Writable writable = converterParent.getValue();
+ FloatWritable floatWritable = (FloatWritable) writable;
+ assertEquals(3276f, floatWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetFloatConverterForDouble() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.FLOAT).named("value");
+ PrimitiveConverter converter =
+ ETypeConverter.getNewConverter(primitiveType, 1, converterParent, createHiveTypeInfo("double"));
+ ((PrimitiveConverter) converter).addFloat(3276f);
+ Writable writable = converterParent.getValue();
+ DoubleWritable doubleWritable = (DoubleWritable) writable;
+ assertEquals(3276d, doubleWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetBinaryConverter() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY).named("value");
+ Writable writable = getWritableFromBinaryConverter(null, primitiveType, Binary.fromString("this_is_a_value"));
+ BytesWritable byteWritable = (BytesWritable) writable;
+ assertEquals("this_is_a_value", new String(byteWritable.getBytes()));
+ }
+
+ @Test
+ public void testGetLongConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT64).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addLong(12225);
+ Writable writable = converterParent.getValue();
+ LongWritable longWritable = (LongWritable) writable;
+ assertEquals(12225L, longWritable.get());
+ }
+
+ @Test
+ public void testGetConverterForList() {
+ MyConverterParent converterParent = new MyConverterParent();
+ GroupType type =
+ Types.optionalList().element(Types.optional(PrimitiveTypeName.INT64).named("value")).named("array");
+ HiveGroupConverter f = HiveGroupConverter.getConverterFromDescription(type, 1, converterParent, null);
+ assertTrue(f instanceof HiveCollectionConverter);
+ }
+
+ @Test
+ public void testGetConverterForMap() {
+ MyConverterParent converterParent = new MyConverterParent();
+ GroupType type = Types.optionalMap().key(Types.optional(PrimitiveTypeName.INT64).named("key"))
+ .value(Types.optional(PrimitiveTypeName.INT64).named("value")).named("map");
+ HiveGroupConverter f = HiveGroupConverter.getConverterFromDescription(type, 1, converterParent, null);
+ assertTrue(f instanceof HiveCollectionConverter);
+ }
+
+ @Test
+ public void testGetConverterForStruct() {
+ MyConverterParent converterParent = new MyConverterParent();
+ GroupType type = Types.buildGroup(Repetition.OPTIONAL).named("struct");
+ HiveGroupConverter f = HiveGroupConverter.getConverterFromDescription(type, 1, converterParent, null);
+ assertTrue(f instanceof HiveStructConverter);
+ }
+
+ private Writable getWritableFromBinaryConverter(TypeInfo hiveTypeInfo, PrimitiveType primitiveType,
+ Binary valueToAdd) {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, hiveTypeInfo);
+ ((BinaryConverter) converter).addBinary(valueToAdd);
+ return converterParent.getValue();
+ }
+
+ private Writable getWritableFromPrimitiveConverter(TypeInfo hiveTypeInfo, PrimitiveType primitiveType,
+ Integer valueToAdd) {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, hiveTypeInfo);
+ ((PrimitiveConverter) converter).addInt(valueToAdd);
+ return converterParent.getValue();
+ }
+
+ private PrimitiveTypeInfo createHiveTypeInfo(String typeName) {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName(typeName);
+ return hiveTypeInfo;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestGetDataColumnReaderByType.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestGetDataColumnReaderByType.java
new file mode 100644
index 0000000000..54fe45ee6e
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestGetDataColumnReaderByType.java
@@ -0,0 +1,148 @@
+package org.apache.hadoop.hive.ql.io.parquet.convert;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.DefaultParquetDataColumnReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromBooleanPageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromDecimalPageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromDoublePageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromFloatPageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromInt32PageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromInt64PageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromInt96PageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromStringPageReader;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
+import org.junit.Test;
+
+public class TestGetDataColumnReaderByType {
+
+ @Test
+ public void testGetDecimalReader() throws Exception {
+ TypeInfo hiveTypeInfo = new DecimalTypeInfo(7, 2);
+ ParquetDataColumnReader reader =
+ ParquetDataColumnReaderFactory.getDataColumnReaderByType(
+ Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(20)
+ .as(LogicalTypeAnnotation.decimalType(2, 5)).named("value"),
+ hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromDecimalPageReader);
+ }
+
+ @Test
+ public void testGetStringReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("string");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory.getDataColumnReaderByType(Types
+ .optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("value"),
+ hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromStringPageReader);
+ }
+
+ @Test
+ public void testGetDecimalReaderFromBinaryPrimitive() throws Exception {
+ TypeInfo hiveTypeInfo = new DecimalTypeInfo(7, 2);
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.decimalType(2, 5)).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromDecimalPageReader);
+ }
+
+ @Test
+ public void testGetBinaryReaderNoOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("string");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.BINARY).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof DefaultParquetDataColumnReader);
+ }
+
+ @Test
+ public void testGetBinaryReaderJsonOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("binary");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory.getDataColumnReaderByType(Types
+ .optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.jsonType()).named("value"),
+ hiveTypeInfo, null, true);
+ assertTrue(reader instanceof DefaultParquetDataColumnReader);
+ }
+
+ @Test
+ public void testGetIntReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("int");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT32)
+ .as(LogicalTypeAnnotation.intType(32, false)).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromInt32PageReader);
+ }
+
+ @Test
+ public void testGetIntReaderNoOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("int");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT32).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromInt32PageReader);
+ }
+
+ @Test
+ public void testGetInt64ReaderNoOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("bigint");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory.getDataColumnReaderByType(
+ Types.optional(PrimitiveTypeName.INT64).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromInt64PageReader);
+ }
+
+ @Test
+ public void testGetInt64Reader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("bigint");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT64)
+ .as(LogicalTypeAnnotation.intType(64, false)).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromInt64PageReader);
+ }
+
+ @Test
+ public void testGetFloatReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("float");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.FLOAT).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromFloatPageReader);
+ }
+
+ @Test
+ public void testGetDoubleReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("double");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.DOUBLE).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromDoublePageReader);
+ }
+
+ @Test
+ public void testGetInt96Reader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("timestamp");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT96).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromInt96PageReader);
+ }
+
+ @Test
+ public void testGetBooleanReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("boolean");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.BOOLEAN).named("value"), hiveTypeInfo, null, true);
+ assertTrue(reader instanceof TypesFromBooleanPageReader);
+ }
+}