diff --git pom.xml pom.xml
index ba87106..394e26b 100644
--- pom.xml
+++ pom.xml
@@ -195,7 +195,7 @@
2.0.0-M5
4.1.17.Final
3.10.5.Final
- 1.10.0
+ 1.11.0
0.16.0
1.5.6
2.5.0
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
index 89dfe2d..06a3ed7 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
@@ -16,6 +16,7 @@
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Map;
+import java.util.Optional;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
@@ -43,7 +44,11 @@
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.PrimitiveConverter;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
/**
@@ -339,10 +344,7 @@ public void addInt(final int value) {
return new PrimitiveConverter() {
@Override
public void addInt(final int value) {
- if (value >= ((OriginalType.UINT_8 == type.getOriginalType() ||
- OriginalType.UINT_16 == type.getOriginalType() ||
- OriginalType.UINT_32 == type.getOriginalType() ||
- OriginalType.UINT_64 == type.getOriginalType()) ? 0 :
+ if (value >= ((ETypeConverter.isUnsignedInteger(type)) ? 0 :
Integer.MIN_VALUE)) {
parent.set(index, new IntWritable(value));
} else {
@@ -446,10 +448,7 @@ public void addLong(final long value) {
return new PrimitiveConverter() {
@Override
public void addLong(final long value) {
- if (value >= ((OriginalType.UINT_8 == type.getOriginalType() ||
- OriginalType.UINT_16 == type.getOriginalType() ||
- OriginalType.UINT_32 == type.getOriginalType() ||
- OriginalType.UINT_64 == type.getOriginalType()) ? 0 : Long.MIN_VALUE)) {
+ if (value >= ((ETypeConverter.isUnsignedInteger(type)) ? 0 : Long.MIN_VALUE)) {
parent.set(index, new LongWritable(value));
} else {
parent.set(index, null);
@@ -632,7 +631,9 @@ private void addDecimal(long value) {
return new BinaryConverter(type, parent, index) {
@Override
protected HiveDecimalWritable convert(Binary binary) {
- return new HiveDecimalWritable(binary.getBytes(), type.getDecimalMetadata().getScale());
+ DecimalLogicalTypeAnnotation logicalType =
+ (DecimalLogicalTypeAnnotation) type.getLogicalTypeAnnotation();
+ return new HiveDecimalWritable(binary.getBytes(), logicalType.getScale());
}
};
}
@@ -686,17 +687,33 @@ private ETypeConverter(final Class> type) {
abstract PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo);
public static PrimitiveConverter getNewConverter(final PrimitiveType type, final int index,
- final ConverterParent parent, TypeInfo hiveTypeInfo) {
+ final ConverterParent parent, final TypeInfo hiveTypeInfo) {
if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
//TODO- cleanup once parquet support Timestamp type annotation.
return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
}
- if (OriginalType.DECIMAL == type.getOriginalType()) {
- return EDECIMAL_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
- } else if (OriginalType.UTF8 == type.getOriginalType()) {
- return ESTRING_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
- } else if (OriginalType.DATE == type.getOriginalType()) {
- return EDATE_CONVERTER.getConverter(type, index, parent, hiveTypeInfo);
+ if (type.getLogicalTypeAnnotation() != null) {
+ Optional converter = type.getLogicalTypeAnnotation()
+ .accept(new LogicalTypeAnnotationVisitor() {
+ @Override
+ public Optional visit(DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(EDECIMAL_CONVERTER.getConverter(type, index, parent, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(StringLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(ESTRING_CONVERTER.getConverter(type, index, parent, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(DateLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(EDATE_CONVERTER.getConverter(type, index, parent, hiveTypeInfo));
+ }
+ });
+
+ if (converter.isPresent()) {
+ return converter.get();
+ }
}
Class> javaType = type.getPrimitiveTypeName().javaType;
@@ -709,11 +726,24 @@ public static PrimitiveConverter getNewConverter(final PrimitiveType type, final
throw new IllegalArgumentException("Converter not found ... for type : " + type);
}
+ public static boolean isUnsignedInteger(final PrimitiveType type) {
+ if (type.getLogicalTypeAnnotation() != null) {
+ Optional isUnsignedInteger = type.getLogicalTypeAnnotation()
+ .accept(new LogicalTypeAnnotationVisitor() {
+ @Override public Optional visit(
+ LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
+ return Optional.of(!intLogicalType.isSigned());
+ }
+ });
+ if (isUnsignedInteger.isPresent()) {
+ return isUnsignedInteger.get();
+ }
+ }
+ return false;
+ }
+
private static long getMinValue(final PrimitiveType type, String typeName, long defaultValue) {
- if (OriginalType.UINT_8 == type.getOriginalType() ||
- OriginalType.UINT_16 == type.getOriginalType() ||
- OriginalType.UINT_32 == type.getOriginalType() ||
- OriginalType.UINT_64 == type.getOriginalType()) {
+ if(isUnsignedInteger(type)) {
return 0;
} else {
switch (typeName) {
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
index c012457..a13a549 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
@@ -19,11 +19,15 @@
import org.apache.parquet.io.api.GroupConverter;
import org.apache.parquet.io.api.PrimitiveConverter;
import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapKeyValueTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import java.util.Map;
+import java.util.Optional;
public abstract class HiveGroupConverter extends GroupConverter implements ConverterParent {
@@ -46,17 +50,34 @@ protected static PrimitiveConverter getConverterFromDescription(PrimitiveType ty
return ETypeConverter.getNewConverter(type, index, parent, hiveTypeInfo);
}
- protected static HiveGroupConverter getConverterFromDescription(GroupType type, int index, ConverterParent parent,
- TypeInfo hiveTypeInfo) {
+ protected static HiveGroupConverter getConverterFromDescription(final GroupType type,
+ final int index, final ConverterParent parent, final TypeInfo hiveTypeInfo) {
if (type == null) {
return null;
}
- OriginalType annotation = type.getOriginalType();
- if (annotation == OriginalType.LIST) {
- return HiveCollectionConverter.forList(type, parent, index, hiveTypeInfo);
- } else if (annotation == OriginalType.MAP || annotation == OriginalType.MAP_KEY_VALUE) {
- return HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo);
+ if (type.getLogicalTypeAnnotation() != null) {
+ Optional converter =
+ type.getLogicalTypeAnnotation().accept(new LogicalTypeAnnotationVisitor(){
+ @Override
+ public Optional visit(ListLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(HiveCollectionConverter.forList(type, parent, index, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(MapLogicalTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo));
+ }
+
+ @Override
+ public Optional visit(MapKeyValueTypeAnnotation logicalTypeAnnotation) {
+ return Optional.of(HiveCollectionConverter.forMap(type, parent, index, hiveTypeInfo));
+ }
+ });
+
+ if (converter.isPresent()) {
+ return converter.get();
+ }
}
return new HiveStructConverter(type, parent, index, hiveTypeInfo);
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
index 302321c..21bfb2e 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
@@ -26,8 +26,8 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.parquet.schema.ConversionPatterns;
import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
@@ -60,16 +60,16 @@ private static Type convertType(final String name, final TypeInfo typeInfo,
final Repetition repetition) {
if (typeInfo.getCategory().equals(Category.PRIMITIVE)) {
if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
- return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
- .named(name);
+ return Types.primitive(PrimitiveTypeName.BINARY, repetition)
+ .as(LogicalTypeAnnotation.stringType()).named(name);
} else if (typeInfo.equals(TypeInfoFactory.intTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
} else if (typeInfo.equals(TypeInfoFactory.shortTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition)
- .as(OriginalType.INT_16).named(name);
+ .as(LogicalTypeAnnotation.intType(16, true)).named(name);
} else if (typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT32, repetition)
- .as(OriginalType.INT_8).named(name);
+ .as(LogicalTypeAnnotation.intType(8, true)).named(name);
} else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
} else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
@@ -86,22 +86,22 @@ private static Type convertType(final String name, final TypeInfo typeInfo,
throw new UnsupportedOperationException("Void type not implemented");
} else if (typeInfo.getTypeName().toLowerCase().startsWith(
serdeConstants.CHAR_TYPE_NAME)) {
- return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+ return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType())
.named(name);
} else if (typeInfo.getTypeName().toLowerCase().startsWith(
serdeConstants.VARCHAR_TYPE_NAME)) {
- return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8)
+ return Types.optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType())
.named(name);
} else if (typeInfo instanceof DecimalTypeInfo) {
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
int prec = decimalTypeInfo.precision();
int scale = decimalTypeInfo.scale();
int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
- return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).
- scale(scale).precision(prec).named(name);
+ return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes)
+ .as(LogicalTypeAnnotation.decimalType(scale, prec)).named(name);
} else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
- return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named
- (name);
+ return Types.primitive(PrimitiveTypeName.INT32, repetition)
+ .as(LogicalTypeAnnotation.dateType()).named(name);
} else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
throw new UnsupportedOperationException("Unknown type not implemented");
} else {
@@ -122,19 +122,21 @@ private static Type convertType(final String name, final TypeInfo typeInfo,
// An optional group containing a repeated anonymous group "bag", containing
// 1 anonymous element "array_element"
- @SuppressWarnings("deprecation")
private static GroupType convertArrayType(final String name, final ListTypeInfo typeInfo) {
final TypeInfo subType = typeInfo.getListElementTypeInfo();
- return new GroupType(Repetition.OPTIONAL, name, OriginalType.LIST, new GroupType(Repetition.REPEATED,
- ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType)));
+ GroupType groupType = Types.optionalGroup().as(LogicalTypeAnnotation.listType())
+ .addField(Types.repeatedGroup().addField(convertType("array_element", subType))
+ .named(ParquetHiveSerDe.ARRAY.toString()))
+ .named(name);
+ return groupType;
}
// An optional group containing multiple elements
private static GroupType convertStructType(final String name, final StructTypeInfo typeInfo) {
final List columnNames = typeInfo.getAllStructFieldNames();
final List columnTypes = typeInfo.getAllStructFieldTypeInfos();
- return new GroupType(Repetition.OPTIONAL, name, convertTypes(columnNames, columnTypes));
-
+ GroupType groupType = Types.optionalGroup().addFields(convertTypes(columnNames, columnTypes)).named(name);
+ return groupType;
}
// An optional group containing a repeated anonymous group "map", containing
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
index 30f3d17..d3245fc 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
@@ -46,8 +46,8 @@
import org.apache.parquet.hadoop.api.ReadSupport;
import org.apache.parquet.io.api.RecordMaterializer;
import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
@@ -160,8 +160,8 @@ private static Type getProjectedType(TypeInfo colType, Type fieldType) {
} else {
subFieldType = getProjectedType(elemType, subFieldType);
}
- return Types.buildGroup(Repetition.OPTIONAL).as(OriginalType.LIST).addFields(
- subFieldType).named(fieldType.getName());
+ return Types.buildGroup(Repetition.OPTIONAL).as(LogicalTypeAnnotation.listType())
+ .addFields(subFieldType).named(fieldType.getName());
}
}
break;
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
index 979ef47..a20e28b 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/ParquetDataColumnReaderFactory.java
@@ -22,6 +22,7 @@
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.Timestamp;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
+import org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
import org.apache.hadoop.hive.serde.serdeConstants;
@@ -38,6 +39,10 @@
import org.apache.parquet.column.Dictionary;
import org.apache.parquet.column.values.ValuesReader;
import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.DecimalLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.LogicalTypeAnnotationVisitor;
+import org.apache.parquet.schema.LogicalTypeAnnotation.StringLogicalTypeAnnotation;
import org.apache.parquet.schema.OriginalType;
import org.apache.parquet.schema.PrimitiveType;
@@ -47,6 +52,7 @@
import java.nio.charset.StandardCharsets;
import java.time.ZoneId;
import java.util.Arrays;
+import java.util.Optional;
/**
* Parquet file has self-describing schema which may differ from the user required schema (e.g.
@@ -1824,10 +1830,7 @@ private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean i
switch (parquetType.getPrimitiveTypeName()) {
case INT32:
- if (OriginalType.UINT_8 == parquetType.getOriginalType() ||
- OriginalType.UINT_16 == parquetType.getOriginalType() ||
- OriginalType.UINT_32 == parquetType.getOriginalType() ||
- OriginalType.UINT_64 == parquetType.getOriginalType()) {
+ if (ETypeConverter.isUnsignedInteger(parquetType)) {
return isDictionary ? new TypesFromUInt32PageReader(dictionary, length, hivePrecision,
hiveScale) : new TypesFromUInt32PageReader(valuesReader, length, hivePrecision,
hiveScale);
@@ -1841,10 +1844,7 @@ private static ParquetDataColumnReader getDataColumnReaderByTypeHelper(boolean i
hiveScale);
}
case INT64:
- if (OriginalType.UINT_8 == parquetType.getOriginalType() ||
- OriginalType.UINT_16 == parquetType.getOriginalType() ||
- OriginalType.UINT_32 == parquetType.getOriginalType() ||
- OriginalType.UINT_64 == parquetType.getOriginalType()) {
+ if (ETypeConverter.isUnsignedInteger(parquetType)) {
return isDictionary ? new TypesFromUInt64PageReader(dictionary, length, hivePrecision,
hiveScale) : new TypesFromUInt64PageReader(valuesReader, length, hivePrecision,
hiveScale);
@@ -1886,7 +1886,7 @@ private static ParquetDataColumnReader getConvertorFromBinary(boolean isDict,
TypeInfo hiveType,
ValuesReader valuesReader,
Dictionary dictionary) {
- OriginalType originalType = parquetType.getOriginalType();
+ LogicalTypeAnnotation logicalType = parquetType.getLogicalTypeAnnotation();
// max length for varchar and char cases
int length = getVarcharLength(hiveType);
@@ -1902,22 +1902,37 @@ private static ParquetDataColumnReader getConvertorFromBinary(boolean isDict,
int hiveScale = (typeName.equalsIgnoreCase(serdeConstants.DECIMAL_TYPE_NAME)) ?
((DecimalTypeInfo) realHiveType).getScale() : 0;
- if (originalType == null) {
- return isDict ? new DefaultParquetDataColumnReader(dictionary, length) : new
- DefaultParquetDataColumnReader(valuesReader, length);
- }
- switch (originalType) {
- case DECIMAL:
- final short scale = (short) parquetType.asPrimitiveType().getDecimalMetadata().getScale();
- return isDict ? new TypesFromDecimalPageReader(dictionary, length, scale, hivePrecision, hiveScale) : new
- TypesFromDecimalPageReader(valuesReader, length, scale, hivePrecision, hiveScale);
- case UTF8:
- return isDict ? new TypesFromStringPageReader(dictionary, length) : new
- TypesFromStringPageReader(valuesReader, length);
- default:
+ if (logicalType == null) {
return isDict ? new DefaultParquetDataColumnReader(dictionary, length) : new
DefaultParquetDataColumnReader(valuesReader, length);
}
+
+ Optional reader = parquetType.getLogicalTypeAnnotation()
+ .accept(new LogicalTypeAnnotationVisitor() {
+ @Override public Optional visit(
+ DecimalLogicalTypeAnnotation logicalTypeAnnotation) {
+ final short scale = (short) logicalTypeAnnotation.getScale();
+ return isDict ? Optional
+ .of(new TypesFromDecimalPageReader(dictionary, length, scale, hivePrecision,
+ hiveScale)) : Optional
+ .of(new TypesFromDecimalPageReader(valuesReader, length, scale, hivePrecision,
+ hiveScale));
+ }
+
+ @Override public Optional visit(
+ StringLogicalTypeAnnotation logicalTypeAnnotation) {
+ return isDict ? Optional
+ .of(new TypesFromStringPageReader(dictionary, length)) : Optional
+ .of(new TypesFromStringPageReader(valuesReader, length));
+ }
+ });
+
+ if (reader.isPresent()) {
+ return reader.get();
+ }
+
+ return isDict ? new DefaultParquetDataColumnReader(dictionary, length) : new
+ DefaultParquetDataColumnReader(valuesReader, length);
}
public static ParquetDataColumnReader getDataColumnReaderByTypeOnDictionary(
diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
index 3d61c33..bd519eb 100644
--- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
+++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
@@ -44,7 +44,9 @@
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.ListLogicalTypeAnnotation;
+import org.apache.parquet.schema.LogicalTypeAnnotation.MapLogicalTypeAnnotation;
import org.apache.parquet.schema.Type;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -141,12 +143,12 @@ private DataWriter createWriter(ObjectInspector inspector, Type type) {
}
} else {
GroupType groupType = type.asGroupType();
- OriginalType originalType = type.getOriginalType();
+ LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
- if (originalType != null && originalType.equals(OriginalType.LIST)) {
+ if (logicalType != null && logicalType instanceof ListLogicalTypeAnnotation) {
checkInspectorCategory(inspector, ObjectInspector.Category.LIST);
return new ListDataWriter((ListObjectInspector)inspector, groupType);
- } else if (originalType != null && originalType.equals(OriginalType.MAP)) {
+ } else if (logicalType != null && logicalType instanceof MapLogicalTypeAnnotation) {
checkInspectorCategory(inspector, ObjectInspector.Category.MAP);
return new MapDataWriter((MapObjectInspector)inspector, groupType);
} else {
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java
index 17eca38..181894f 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/HiveParquetSchemaTestUtils.java
@@ -18,14 +18,18 @@
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.Type;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
public class HiveParquetSchemaTestUtils {
@@ -67,9 +71,37 @@ public static void testConversion(
List expectedFields = expectedMT.getFields();
List actualFields = messageTypeFound.getFields();
for (int i = 0, n = expectedFields.size(); i < n; ++i) {
- OriginalType exp = expectedFields.get(i).getOriginalType();
- OriginalType act = actualFields.get(i).getOriginalType();
- assertEquals("Original types of the field do not match", exp, act);
+
+ LogicalTypeAnnotation expectedLogicalType = expectedFields.get(i).getLogicalTypeAnnotation();
+ LogicalTypeAnnotation actualLogicalType = actualFields.get(i).getLogicalTypeAnnotation();
+ assertEquals("Logical type annotations of the field do not match", expectedLogicalType, actualLogicalType);
+ }
+ }
+
+ public static void testLogicalTypeAnnotation(String hiveColumnType, String hiveColumnName,
+ LogicalTypeAnnotation expectedLogicalType) throws Exception {
+ Map expectedLogicalTypeForColumn = new HashMap<>();
+ expectedLogicalTypeForColumn.put(hiveColumnName, expectedLogicalType);
+ testLogicalTypeAnnotations(hiveColumnName, hiveColumnType, expectedLogicalTypeForColumn);
+ }
+
+ public static void testLogicalTypeAnnotations(final String hiveColumnNames,
+ final String hiveColumnTypes, final Map expectedLogicalTypes)
+ throws Exception {
+ final List columnNames = createHiveColumnsFrom(hiveColumnNames);
+ final List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
+ final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
+ List actualFields = messageTypeFound.getFields();
+ for (Type actualField : actualFields) {
+ LogicalTypeAnnotation expectedLogicalType = expectedLogicalTypes.get(actualField.getName());
+ LogicalTypeAnnotation actualLogicalType = actualField.getLogicalTypeAnnotation();
+ if (expectedLogicalType != null) {
+ assertNotNull("The logical type annotation cannot be null.", actualLogicalType);
+ assertEquals("Logical type annotations of the field do not match", expectedLogicalType,
+ actualLogicalType);
+ } else {
+ assertNull("The logical type annotation must be null.", actualLogicalType);
+ }
}
}
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java
index f2814f6..aea0bf9 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestArrayCompatibility.java
@@ -28,10 +28,10 @@
import org.junit.Assert;
import org.junit.Test;
import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.Types;
-import static org.apache.parquet.schema.OriginalType.LIST;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
@@ -123,7 +123,7 @@ public void write(RecordConsumer rc) {
public void testThriftPrimitiveInList() throws Exception {
Path test = writeDirect("ThriftPrimitiveInList",
Types.buildMessage()
- .requiredGroup().as(LIST)
+ .requiredGroup().as(LogicalTypeAnnotation.listType())
.repeated(INT32).named("list_of_ints_tuple")
.named("list_of_ints")
.named("ThriftPrimitiveInList"),
@@ -163,7 +163,7 @@ public void testThriftSingleFieldGroupInList() throws Exception {
Path test = writeDirect("ThriftSingleFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(INT64).named("count")
.named("single_element_groups_tuple")
@@ -212,7 +212,7 @@ public void write(RecordConsumer rc) {
public void testAvroPrimitiveInList() throws Exception {
Path test = writeDirect("AvroPrimitiveInList",
Types.buildMessage()
- .requiredGroup().as(LIST)
+ .requiredGroup().as(LogicalTypeAnnotation.listType())
.repeated(INT32).named("array")
.named("list_of_ints")
.named("AvroPrimitiveInList"),
@@ -252,7 +252,7 @@ public void testAvroSingleFieldGroupInList() throws Exception {
Path test = writeDirect("AvroSingleFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(INT64).named("count")
.named("array")
@@ -304,7 +304,7 @@ public void testAmbiguousSingleFieldGroupInList() throws Exception {
Path test = writeDirect("SingleFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(INT64).named("count")
.named("single_element_group")
@@ -355,7 +355,7 @@ public void testMultiFieldGroupInList() throws Exception {
Path test = writeDirect("MultiFieldGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.required(DOUBLE).named("latitude")
.required(DOUBLE).named("longitude")
@@ -411,7 +411,7 @@ public void write(RecordConsumer rc) {
public void testNewOptionalGroupInList() throws Exception {
Path test = writeDirect("NewOptionalGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.optionalGroup()
.required(DOUBLE).named("latitude")
@@ -488,7 +488,7 @@ public void write(RecordConsumer rc) {
public void testNewRequiredGroupInList() throws Exception {
Path test = writeDirect("NewRequiredGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.requiredGroup()
.required(DOUBLE).named("latitude")
@@ -561,7 +561,7 @@ public void testHiveRequiredGroupInList() throws Exception {
// this matches the list structure that Hive writes
Path test = writeDirect("HiveRequiredGroupInList",
Types.buildMessage()
- .optionalGroup().as(LIST)
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.requiredGroup()
.required(DOUBLE).named("latitude")
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
index e1b2ba1..dc80af1 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestHiveSchemaConverter.java
@@ -18,12 +18,19 @@
import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.testConversion;
import static org.junit.Assert.assertEquals;
+import static org.apache.hadoop.hive.ql.io.parquet.HiveParquetSchemaTestUtils.testLogicalTypeAnnotation;
+
+import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.parquet.schema.MessageType;
-import org.apache.parquet.schema.OriginalType;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.MessageTypeParser;
+import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Type.Repetition;
import org.junit.Test;
@@ -33,13 +40,16 @@
@Test
public void testSimpleType() throws Exception {
testConversion(
- "a,b,c,d",
- "int,bigint,double,boolean",
+ "a,b,c,d,e,f,g",
+ "int,bigint,double,boolean,string,float,binary",
"message hive_schema {\n"
+ " optional int32 a;\n"
+ " optional int64 b;\n"
+ " optional double c;\n"
+ " optional boolean d;\n"
+ + " optional binary e (UTF8);\n"
+ + " optional float f;\n"
+ + " optional binary g;\n"
+ "}\n");
}
@@ -55,6 +65,17 @@ public void testSpecialIntType() throws Exception {
}
@Test
+ public void testSpecialIntTypeWithLogicatlTypeAnnotations() throws Exception {
+ testConversion(
+ "a,b",
+ "tinyint,smallint",
+ "message hive_schema {\n"
+ + " optional int32 a (INTEGER(8,true));\n"
+ + " optional int32 b (INTEGER(16,true));\n"
+ + "}\n");
+ }
+
+ @Test
public void testDecimalType() throws Exception {
testConversion(
"a",
@@ -95,6 +116,16 @@ public void testDateType() throws Exception {
}
@Test
+ public void testTimestampType() throws Exception {
+ testConversion(
+ "a",
+ "timestamp",
+ "message hive_schema {\n"
+ + " optional int96 a;\n"
+ + "}\n");
+ }
+
+ @Test
public void testArray() throws Exception {
testConversion("arrayCol",
"array",
@@ -121,6 +152,99 @@ public void testArrayDecimal() throws Exception {
}
@Test
+ public void testArrayTinyInt() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int32 array_element (INT_8);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArraySmallInt() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int32 array_element (INT_16);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayString() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional binary array_element (UTF8);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayTimestamp() throws Exception {
+ testConversion("arrayCol",
+ "array",
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int96 array_element;\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayStruct() throws Exception {
+ testConversion("structCol",
+ "array>",
+ "message hive_schema {\n"
+ + " optional group structCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional group array_element {\n"
+ + " optional binary a (UTF8);\n"
+ + " optional int32 b;\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testArrayInArray() throws Exception {
+ final List columnNames = createHiveColumnsFrom("arrayCol");
+ ListTypeInfo listTypeInfo = new ListTypeInfo();
+ listTypeInfo.setListElementTypeInfo(TypeInfoUtils.getTypeInfosFromTypeString("int").get(0));
+ List typeInfos = new ArrayList<>();
+ ListTypeInfo listTypeInfo2 = new ListTypeInfo();
+ listTypeInfo2.setListElementTypeInfo(listTypeInfo);
+ typeInfos.add(listTypeInfo2);
+ final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, typeInfos);
+ final MessageType expectedMT = MessageTypeParser.parseMessageType(
+ "message hive_schema {\n"
+ + " optional group arrayCol (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional group array_element (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional int32 array_element;\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ assertEquals(expectedMT, messageTypeFound);
+ }
+
+ @Test
public void testStruct() throws Exception {
testConversion("structCol",
"struct",
@@ -135,6 +259,61 @@ public void testStruct() throws Exception {
}
@Test
+ public void testStructInts() throws Exception {
+ testConversion("structCol",
+ "struct",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional int32 a (INT_8);\n"
+ + " optional int32 b (INT_16);\n"
+ + " optional int32 c;\n"
+ + " optional int64 d;\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testStructStrings() throws Exception {
+ testConversion("structCol",
+ "struct",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional binary a (UTF8);\n"
+ + " optional binary b (UTF8);\n"
+ + " optional binary c (UTF8);\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testStructTimestamp() throws Exception {
+ testConversion("structCol",
+ "struct",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional int96 a;\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testStructList() throws Exception {
+ testConversion("structCol",
+ "struct,b:int,c:string>",
+ "message hive_schema {\n"
+ + " optional group structCol {\n"
+ + " optional group a (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional binary array_element (UTF8);\n"
+ + " }\n"
+ + " }\n"
+ + " optional int32 b;\n"
+ + " optional binary c (UTF8);"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
public void testMap() throws Exception {
testConversion("mapCol",
"map",
@@ -163,24 +342,130 @@ public void testMapDecimal() throws Exception {
}
@Test
+ public void testMapInts() throws Exception {
+ testConversion("mapCol",
+ "map",
+ "message hive_schema {\n"
+ + " optional group mapCol (MAP) {\n"
+ + " repeated group map (MAP_KEY_VALUE) {\n"
+ + " required int32 key (INT_16);\n"
+ + " optional int32 value (INT_8);\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testMapStruct() throws Exception {
+ testConversion("mapCol",
+ "map>",
+ "message hive_schema {\n"
+ + " optional group mapCol (MAP) {\n"
+ + " repeated group map (MAP_KEY_VALUE) {\n"
+ + " required binary key (UTF8);\n"
+ + " optional group value {\n"
+ + " optional int32 a (INT_16);\n"
+ + " optional int32 b;\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testMapList() throws Exception {
+ testConversion("mapCol",
+ "map>",
+ "message hive_schema {\n"
+ + " optional group mapCol (MAP) {\n"
+ + " repeated group map (MAP_KEY_VALUE) {\n"
+ + " required binary key (UTF8);\n"
+ + " optional group value (LIST) {\n"
+ + " repeated group bag {\n"
+ + " optional binary array_element (UTF8);\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + " }\n"
+ + "}\n");
+ }
+
+ @Test
+ public void testLogicalTypes() throws Exception {
+ testLogicalTypeAnnotation("string", "a", LogicalTypeAnnotation.stringType());
+ testLogicalTypeAnnotation("int", "a", null);
+ testLogicalTypeAnnotation("smallint", "a", LogicalTypeAnnotation.intType(16, true));
+ testLogicalTypeAnnotation("tinyint", "a", LogicalTypeAnnotation.intType(8, true));
+ testLogicalTypeAnnotation("bigint", "a", null);
+ testLogicalTypeAnnotation("double", "a", null);
+ testLogicalTypeAnnotation("float", "a", null);
+ testLogicalTypeAnnotation("boolean", "a", null);
+ testLogicalTypeAnnotation("binary", "a", null);
+ testLogicalTypeAnnotation("timestamp", "a", null);
+ testLogicalTypeAnnotation("char(3)", "a", LogicalTypeAnnotation.stringType());
+ testLogicalTypeAnnotation("varchar(30)", "a", LogicalTypeAnnotation.stringType());
+ testLogicalTypeAnnotation("decimal(7,2)", "a", LogicalTypeAnnotation.decimalType(2, 7));
+ }
+
+ @Test
public void testMapOriginalType() throws Exception {
- final String hiveColumnTypes = "map";
- final String hiveColumnNames = "mapCol";
- final List columnNames = createHiveColumnsFrom(hiveColumnNames);
- final List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
- final MessageType messageTypeFound = HiveSchemaConverter.convert(columnNames, columnTypes);
+ final MessageType messageTypeFound = createSchema("map", "mapCol");
// this messageType only has one optional field, whose name is mapCol, original Type is MAP
assertEquals(1, messageTypeFound.getFieldCount());
- org.apache.parquet.schema.Type topLevel = messageTypeFound.getFields().get(0);
- assertEquals("mapCol",topLevel.getName());
- assertEquals(OriginalType.MAP, topLevel.getOriginalType());
- assertEquals(Repetition.OPTIONAL, topLevel.getRepetition());
+ Type topLevel = messageTypeFound.getFields().get(0);
+ checkField(topLevel, "mapCol", Repetition.OPTIONAL, LogicalTypeAnnotation.mapType());
assertEquals(1, topLevel.asGroupType().getFieldCount());
- org.apache.parquet.schema.Type secondLevel = topLevel.asGroupType().getFields().get(0);
- //there is one repeated field for mapCol, the field name is "map" and its original Type is MAP_KEY_VALUE;
- assertEquals("map", secondLevel.getName());
- assertEquals(OriginalType.MAP_KEY_VALUE, secondLevel.getOriginalType());
- assertEquals(Repetition.REPEATED, secondLevel.getRepetition());
+ Type secondLevel = topLevel.asGroupType().getFields().get(0);
+ // there is one repeated field for mapCol, the field name is "map" and its original Type is
+ // MAP_KEY_VALUE;
+ checkField(secondLevel, "map", Repetition.REPEATED, LogicalTypeAnnotation.MapKeyValueTypeAnnotation.getInstance());
+ }
+
+ @Test
+ public void testListOriginalType() throws Exception {
+
+ final MessageType messageTypeFound = createSchema("array", "arrayCol");
+
+ assertEquals(1, messageTypeFound.getFieldCount());
+ Type topLevel = messageTypeFound.getFields().get(0);
+ checkField(topLevel, "arrayCol", Repetition.OPTIONAL, LogicalTypeAnnotation.listType());
+
+ assertEquals(1, topLevel.asGroupType().getFieldCount());
+ Type secondLevel = topLevel.asGroupType().getFields().get(0);
+ checkField(secondLevel, "bag", Repetition.REPEATED, null);
+
+ assertEquals(1, secondLevel.asGroupType().getFieldCount());
+ Type thirdLevel = secondLevel.asGroupType().getFields().get(0);
+ checkField(thirdLevel, "array_element", Repetition.OPTIONAL, LogicalTypeAnnotation.intType(8, true));
+ }
+
+ @Test
+ public void testStructOriginalType() throws Exception {
+
+ final MessageType messageTypeFound = createSchema("struct", "structCol");
+
+ assertEquals(1, messageTypeFound.getFieldCount());
+ Type topLevel = messageTypeFound.getFields().get(0);
+ checkField(topLevel, "structCol", Repetition.OPTIONAL, null);
+
+ assertEquals(2, topLevel.asGroupType().getFieldCount());
+ Type a = topLevel.asGroupType().getFields().get(0);
+ checkField(a, "a", Repetition.OPTIONAL, LogicalTypeAnnotation.intType(16, true));
+ Type b = topLevel.asGroupType().getFields().get(1);
+ checkField(b, "b", Repetition.OPTIONAL, LogicalTypeAnnotation.stringType());
+ }
+
+ private MessageType createSchema(String hiveColumnTypes, String hiveColumnNames) {
+ List columnNames = createHiveColumnsFrom(hiveColumnNames);
+ List columnTypes = createHiveTypeInfoFrom(hiveColumnTypes);
+ return HiveSchemaConverter.convert(columnNames, columnTypes);
+ }
+
+ private void checkField(Type field, String expectedName, Repetition expectedRepetition,
+ LogicalTypeAnnotation expectedLogicalType) {
+ assertEquals(expectedName, field.getName());
+ assertEquals(expectedLogicalType, field.getLogicalTypeAnnotation());
+ assertEquals(expectedRepetition, field.getRepetition());
}
}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java
index 7717f3c..7de25dd 100644
--- ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestMapStructures.java
@@ -29,9 +29,9 @@
import org.junit.Test;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.io.api.RecordConsumer;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.Types;
-import static org.apache.parquet.schema.OriginalType.*;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.*;
public class TestMapStructures extends AbstractTestParquetDirect {
@@ -40,9 +40,9 @@
public void testStringMapRequiredPrimitive() throws Exception {
Path test = writeDirect("StringMapRequiredPrimitive",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
.required(INT32).named("value")
.named("key_value")
.named("votes")
@@ -100,9 +100,9 @@ public void write(RecordConsumer rc) {
public void testStringMapOptionalPrimitive() throws Exception {
Path test = writeDirect("StringMapOptionalPrimitive",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
.optional(INT32).named("value")
.named("key_value")
.named("votes")
@@ -170,12 +170,12 @@ public void testStringMapOfOptionalArray() throws Exception {
Path test = writeDirect("StringMapOfOptionalArray",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
- .optionalGroup().as(LIST)
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
- .optional(BINARY).as(UTF8).named("element")
+ .optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("element")
.named("list")
.named("value")
.named("key_value")
@@ -250,10 +250,10 @@ public void testStringMapOfOptionalIntArray() throws Exception {
Path test = writeDirect("StringMapOfOptionalIntArray",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .required(BINARY).as(UTF8).named("key")
- .optionalGroup().as(LIST)
+ .required(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
+ .optionalGroup().as(LogicalTypeAnnotation.listType())
.repeatedGroup()
.optional(INT32).named("element")
.named("list")
@@ -343,7 +343,7 @@ public void write(RecordConsumer rc) {
public void testMapWithComplexKey() throws Exception {
Path test = writeDirect("MapWithComplexKey",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
.requiredGroup()
.required(INT32).named("x")
@@ -404,7 +404,7 @@ public void write(RecordConsumer rc) {
public void testDoubleMapWithStructValue() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
.optional(DOUBLE).named("key")
.optionalGroup()
@@ -465,12 +465,12 @@ public void write(RecordConsumer rc) {
public void testNestedMap() throws Exception {
Path test = writeDirect("DoubleMapWithStructValue",
Types.buildMessage()
- .optionalGroup().as(MAP)
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .optional(BINARY).as(UTF8).named("key")
- .optionalGroup().as(MAP)
+ .optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
+ .optionalGroup().as(LogicalTypeAnnotation.mapType())
.repeatedGroup()
- .optional(BINARY).as(UTF8).named("key")
+ .optional(BINARY).as(LogicalTypeAnnotation.stringType()).named("key")
.required(INT32).named("value")
.named("key_value")
.named("value")
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/MyConverterParent.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/MyConverterParent.java
new file mode 100644
index 0000000..2229b3a
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/MyConverterParent.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.parquet.convert;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Helper class for TestETypeConverter.
+ */
+public class MyConverterParent implements ConverterParent {
+
+ private Writable value;
+
+ public Writable getValue() {
+ return value;
+ }
+
+ @Override
+ public void set(int index, Writable value) {
+ this.value = value;
+ }
+
+ @Override
+ public Map getMetadata() {
+ Map metadata = new HashMap<>();
+ metadata.put(HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname, "false");
+ return metadata;
+ }
+
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
new file mode 100644
index 0000000..80f43f0
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestETypeConverter.java
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.parquet.convert;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.ql.io.parquet.convert.ETypeConverter.BinaryConverter;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
+import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritableV2;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
+import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.DoubleWritable;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.io.api.PrimitiveConverter;
+import org.apache.parquet.schema.GroupType;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Type.Repetition;
+import org.apache.parquet.schema.Types;
+import org.junit.Test;
+
+/**
+ * Tests for class ETypeConverter.
+ */
+public class TestETypeConverter {
+
+ @Test
+ public void testGetDecimalConverter() throws Exception {
+ TypeInfo hiveTypeInfo = new DecimalTypeInfo(7, 2);
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.decimalType(2, 7)).named("value");
+ Writable writable = getWritableFromBinaryConverter(hiveTypeInfo, primitiveType, Binary.fromString("155"));
+ HiveDecimalWritable decimalWritable = (HiveDecimalWritable) writable;
+ assertEquals(2, decimalWritable.getScale());
+ }
+
+ @Test
+ public void testGetTimestampConverter() throws Exception {
+ Timestamp timestamp = Timestamp.valueOf("2018-06-15 15:12:20.0");
+ NanoTime nanoTime = NanoTimeUtils.getNanoTime(timestamp, true);
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT96).named("value");
+ Writable writable = getWritableFromBinaryConverter(null, primitiveType, nanoTime.toBinary());
+ TimestampWritableV2 timestampWritable = (TimestampWritableV2) writable;
+ assertEquals(timestamp.getNanos(), timestampWritable.getNanos());
+ }
+
+ @Test
+ public void testGetTextConverter() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.stringType()).named("value");
+ Writable writable = getWritableFromBinaryConverter(new VarcharTypeInfo(), primitiveType,
+ Binary.fromString("this_is_a_value"));
+ Text textWritable = (Text) writable;
+ assertEquals("this_is_a_value", textWritable.toString());
+ }
+
+ @Test
+ public void testGetTextConverterNoHiveTypeInfo() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.stringType()).named("value");
+ Writable writable =
+ getWritableFromBinaryConverter(null, primitiveType, Binary.fromString("this_is_a_value"));
+ Text textWritable = (Text) writable;
+ assertEquals("this_is_a_value", textWritable.toString());
+ }
+
+ @Test
+ public void testGetIntConverterForTinyInt() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32)
+ .as(LogicalTypeAnnotation.intType(8, false)).named("value");
+ Writable writable =
+ getWritableFromPrimitiveConverter(createHiveTypeInfo("tinyint"), primitiveType, 125);
+ IntWritable intWritable = (IntWritable) writable;
+ assertEquals(125, intWritable.get());
+ }
+
+ @Test
+ public void testGetIntConverterForFloat() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("float"), primitiveType, 22225);
+ FloatWritable floatWritable = (FloatWritable) writable;
+ assertEquals((float) 22225, (float) floatWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetIntConverterForBigint() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("bigint"), primitiveType, 22225);
+ LongWritable longWritable = (LongWritable) writable;
+ assertEquals(22225, longWritable.get());
+ }
+
+ @Test
+ public void testGetIntConverterForDouble() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(createHiveTypeInfo("double"), primitiveType, 22225);
+ DoubleWritable doubleWritable = (DoubleWritable) writable;
+ assertEquals((double) 22225, (double) doubleWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetIntConverterForSmallint() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32)
+ .as(LogicalTypeAnnotation.intType(16, false)).named("value");
+ Writable writable =
+ getWritableFromPrimitiveConverter(createHiveTypeInfo("smallint"), primitiveType, 32766);
+ IntWritable intWritable = (IntWritable) writable;
+ assertEquals(32766, intWritable.get());
+ }
+
+ @Test
+ public void testGetIntConverterNoHiveTypeInfo() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT32).named("value");
+ Writable writable = getWritableFromPrimitiveConverter(null, primitiveType, 12225);
+ IntWritable intWritable = (IntWritable) writable;
+ assertEquals(12225, intWritable.get());
+ }
+
+ @Test
+ public void testGetDoubleConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.DOUBLE).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addDouble(3276);
+ Writable writable = converterParent.getValue();
+ DoubleWritable doubleWritable = (DoubleWritable) writable;
+ assertEquals(3276, doubleWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetBooleanConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BOOLEAN).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addBoolean(true);
+ Writable writable = converterParent.getValue();
+ BooleanWritable booleanWritable = (BooleanWritable) writable;
+ assertEquals(true, booleanWritable.get());
+ }
+
+ @Test
+ public void testGetFloatConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.FLOAT).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addFloat(3276f);
+ Writable writable = converterParent.getValue();
+ FloatWritable floatWritable = (FloatWritable) writable;
+ assertEquals(3276f, floatWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetFloatConverterForDouble() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.FLOAT).named("value");
+ PrimitiveConverter converter =
+ ETypeConverter.getNewConverter(primitiveType, 1, converterParent, createHiveTypeInfo("double"));
+ ((PrimitiveConverter) converter).addFloat(3276f);
+ Writable writable = converterParent.getValue();
+ DoubleWritable doubleWritable = (DoubleWritable) writable;
+ assertEquals(3276d, doubleWritable.get(), 0);
+ }
+
+ @Test
+ public void testGetBinaryConverter() throws Exception {
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.BINARY).named("value");
+ Writable writable = getWritableFromBinaryConverter(null, primitiveType, Binary.fromString("this_is_a_value"));
+ BytesWritable byteWritable = (BytesWritable) writable;
+ assertEquals("this_is_a_value", new String(byteWritable.getBytes()));
+ }
+
+ @Test
+ public void testGetLongConverter() throws Exception {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveType primitiveType = Types.optional(PrimitiveTypeName.INT64).named("value");
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, null);
+ ((PrimitiveConverter) converter).addLong(12225);
+ Writable writable = converterParent.getValue();
+ LongWritable longWritable = (LongWritable) writable;
+ assertEquals(12225L, longWritable.get());
+ }
+
+ @Test
+ public void testGetConverterForList() {
+ MyConverterParent converterParent = new MyConverterParent();
+ GroupType type =
+ Types.optionalList().element(Types.optional(PrimitiveTypeName.INT64).named("value")).named("array");
+ HiveGroupConverter f = HiveGroupConverter.getConverterFromDescription(type, 1, converterParent, null);
+ assertTrue(f instanceof HiveCollectionConverter);
+ }
+
+ @Test
+ public void testGetConverterForMap() {
+ MyConverterParent converterParent = new MyConverterParent();
+ GroupType type = Types.optionalMap().key(Types.optional(PrimitiveTypeName.INT64).named("key"))
+ .value(Types.optional(PrimitiveTypeName.INT64).named("value")).named("map");
+ HiveGroupConverter f = HiveGroupConverter.getConverterFromDescription(type, 1, converterParent, null);
+ assertTrue(f instanceof HiveCollectionConverter);
+ }
+
+ @Test
+ public void testGetConverterForStruct() {
+ MyConverterParent converterParent = new MyConverterParent();
+ GroupType type = Types.buildGroup(Repetition.OPTIONAL).named("struct");
+ HiveGroupConverter f = HiveGroupConverter.getConverterFromDescription(type, 1, converterParent, null);
+ assertTrue(f instanceof HiveStructConverter);
+ }
+
+ private Writable getWritableFromBinaryConverter(TypeInfo hiveTypeInfo, PrimitiveType primitiveType,
+ Binary valueToAdd) {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, hiveTypeInfo);
+ ((BinaryConverter) converter).addBinary(valueToAdd);
+ return converterParent.getValue();
+ }
+
+ private Writable getWritableFromPrimitiveConverter(TypeInfo hiveTypeInfo, PrimitiveType primitiveType,
+ Integer valueToAdd) {
+ MyConverterParent converterParent = new MyConverterParent();
+ PrimitiveConverter converter = ETypeConverter.getNewConverter(primitiveType, 1, converterParent, hiveTypeInfo);
+ ((PrimitiveConverter) converter).addInt(valueToAdd);
+ return converterParent.getValue();
+ }
+
+ private PrimitiveTypeInfo createHiveTypeInfo(String typeName) {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName(typeName);
+ return hiveTypeInfo;
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestGetDataColumnReaderByType.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestGetDataColumnReaderByType.java
new file mode 100644
index 0000000..bfacf2e
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/TestGetDataColumnReaderByType.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io.parquet.convert;
+
+import static org.junit.Assert.assertTrue;
+
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.DefaultParquetDataColumnReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromBooleanPageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromDecimalPageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromDoublePageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromFloatPageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromInt32PageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromInt64PageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromInt96PageReader;
+import org.apache.hadoop.hive.ql.io.parquet.vector.ParquetDataColumnReaderFactory.TypesFromStringPageReader;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.parquet.schema.LogicalTypeAnnotation;
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName;
+import org.apache.parquet.schema.Types;
+import org.junit.Test;
+
+/**
+ * Tests for ParquetDataColumnReaderFactory#getDataColumnReaderByType.
+ */
+public class TestGetDataColumnReaderByType {
+
+ @Test
+ public void testGetDecimalReader() throws Exception {
+ TypeInfo hiveTypeInfo = new DecimalTypeInfo(7, 2);
+ ParquetDataColumnReader reader =
+ ParquetDataColumnReaderFactory.getDataColumnReaderByType(
+ Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(20)
+ .as(LogicalTypeAnnotation.decimalType(2, 5)).named("value"),
+ hiveTypeInfo, null, true, null);
+ assertTrue(reader instanceof TypesFromDecimalPageReader);
+ }
+
+ @Test
+ public void testGetStringReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("string");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory.getDataColumnReaderByType(Types
+ .optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("value"),
+ hiveTypeInfo, null, true, null);
+ assertTrue(reader instanceof TypesFromStringPageReader);
+ }
+
+ @Test
+ public void testGetDecimalReaderFromBinaryPrimitive() throws Exception {
+ TypeInfo hiveTypeInfo = new DecimalTypeInfo(7, 2);
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.BINARY)
+ .as(LogicalTypeAnnotation.decimalType(2, 5)).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromDecimalPageReader);
+ }
+
+ @Test
+ public void testGetBinaryReaderNoOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("string");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.BINARY).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof DefaultParquetDataColumnReader);
+ }
+
+ @Test
+ public void testGetBinaryReaderJsonOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("binary");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory.getDataColumnReaderByType(Types
+ .optional(PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.jsonType()).named("value"),
+ hiveTypeInfo, null, true, null);
+ assertTrue(reader instanceof DefaultParquetDataColumnReader);
+ }
+
+ @Test
+ public void testGetIntReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("int");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT32)
+ .as(LogicalTypeAnnotation.intType(32, false)).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromInt32PageReader);
+ }
+
+ @Test
+ public void testGetIntReaderNoOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("int");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT32).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromInt32PageReader);
+ }
+
+ @Test
+ public void testGetInt64ReaderNoOriginalType() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("bigint");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory.getDataColumnReaderByType(
+ Types.optional(PrimitiveTypeName.INT64).named("value"), hiveTypeInfo, null, true, null);
+ assertTrue(reader instanceof TypesFromInt64PageReader);
+ }
+
+ @Test
+ public void testGetInt64Reader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("bigint");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT64)
+ .as(LogicalTypeAnnotation.intType(64, false)).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromInt64PageReader);
+ }
+
+ @Test
+ public void testGetFloatReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("float");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.FLOAT).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromFloatPageReader);
+ }
+
+ @Test
+ public void testGetDoubleReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("double");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.DOUBLE).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromDoublePageReader);
+ }
+
+ @Test
+ public void testGetInt96Reader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("timestamp");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.INT96).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromInt96PageReader);
+ }
+
+ @Test
+ public void testGetBooleanReader() throws Exception {
+ PrimitiveTypeInfo hiveTypeInfo = new PrimitiveTypeInfo();
+ hiveTypeInfo.setTypeName("boolean");
+ ParquetDataColumnReader reader = ParquetDataColumnReaderFactory
+ .getDataColumnReaderByType(Types.optional(PrimitiveTypeName.BOOLEAN).named("value"), hiveTypeInfo, null, true,
+ null);
+ assertTrue(reader instanceof TypesFromBooleanPageReader);
+ }
+}
diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/package-info.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/package-info.java
new file mode 100644
index 0000000..29dc0f4
--- /dev/null
+++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/convert/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test conversion between Parquet LogicalTypes and Hive types.
+ */
+package org.apache.hadoop.hive.ql.io.parquet.convert;
diff --git ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
index 48e8405..ec2cd8b 100644
--- ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
+++ ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out
@@ -4077,7 +4077,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: flights_tiny_parquet_partitioned_date
- Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 137 Data size: 11618 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
Select Operator
@@ -4087,7 +4087,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- Statistics: Num rows: 137 Data size: 13861 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 137 Data size: 11618 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
Group By Vectorization:
@@ -5120,7 +5120,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: flights_tiny_parquet_partitioned_timestamp
- Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 137 Data size: 9330 Basic stats: COMPLETE Column stats: COMPLETE
TableScan Vectorization:
native: true
Select Operator
@@ -5130,7 +5130,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [5]
- Statistics: Num rows: 137 Data size: 11189 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 137 Data size: 9330 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
Group By Vectorization:
diff --git ql/src/test/results/clientpositive/parquet_stats.q.out ql/src/test/results/clientpositive/parquet_stats.q.out
index ea15e91..01aa206 100644
--- ql/src/test/results/clientpositive/parquet_stats.q.out
+++ ql/src/test/results/clientpositive/parquet_stats.q.out
@@ -47,8 +47,8 @@ Table Parameters:
bucketing_version 2
numFiles 1
numRows 2
- rawDataSize 146
- totalSize 500
+ rawDataSize 86
+ totalSize 544
#### A masked pattern was here ####
# Storage Information
diff --git ql/src/test/results/clientpositive/spark/parquet_join.q.out ql/src/test/results/clientpositive/spark/parquet_join.q.out
index e86c5f6..cd2131e 100644
--- ql/src/test/results/clientpositive/spark/parquet_join.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_join.q.out
@@ -87,39 +87,39 @@ STAGE PLANS:
TableScan
alias: p1
filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Map 3
Map Operator Tree:
TableScan
alias: p2
filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), myvalue (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string)
Reducer 2
Reduce Operator Tree:
@@ -130,14 +130,14 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col2
- Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -186,14 +186,14 @@ STAGE PLANS:
TableScan
alias: p1
filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 120 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 73 Basic stats: COMPLETE Column stats: NONE
Spark HashTable Sink Operator
keys:
0 _col0 (type: int)
@@ -210,14 +210,14 @@ STAGE PLANS:
TableScan
alias: p2
filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), myvalue (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 199 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 114 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -227,14 +227,14 @@ STAGE PLANS:
outputColumnNames: _col2
input vertices:
0 Map 1
- Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 132 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -321,14 +321,14 @@ STAGE PLANS:
TableScan
alias: p2
filterExpr: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: key is not null (type: boolean)
- Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: int), value2 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 245 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 147 Basic stats: COMPLETE Column stats: NONE
Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
@@ -336,14 +336,14 @@ STAGE PLANS:
0 _col0 (type: int)
1 _col0 (type: int)
outputColumnNames: _col1, _col3
- Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 93 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 93 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 2 Data size: 158 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2 Data size: 93 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
index a830f95..013ca68 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out
@@ -84,7 +84,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -94,7 +94,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(ctinyint)
Group By Vectorization:
@@ -227,11 +227,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1)
minReductionHashAggr: 0.99
@@ -400,7 +400,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -410,7 +410,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(cbigint)
Group By Vectorization:
@@ -543,11 +543,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1)
minReductionHashAggr: 0.99
@@ -716,7 +716,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -726,7 +726,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [4]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(cfloat)
Group By Vectorization:
@@ -859,11 +859,11 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cfloat (type: float), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1)
minReductionHashAggr: 0.99
@@ -1025,7 +1025,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -1034,7 +1034,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterDecimalColNotEqualDecimalScalar(col 13:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterDoubleColLessDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterStringColLikeStringScalar(col 7:string, pattern %b%))
predicate: ((CAST( cint AS decimal(13,3)) <> 79.553) or (UDFToDouble(cbigint) < cdouble) or (cstring2 like '%b%')) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
@@ -1043,7 +1043,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [3, 4, 0, 15, 18]
selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col4), sum(_col3), count(), sum(_col1), min(_col2)
Group By Vectorization:
@@ -1214,14 +1214,14 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99
@@ -30033,22 +30033,22 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean)
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30157,22 +30157,22 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30280,22 +30280,22 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30407,25 +30407,25 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Filter Operator
isSamplingPred: false
predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: cstring1 (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: bigint)
auto parallelism: false
@@ -30492,16 +30492,16 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint), _col0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string)
null sort order: z
sort order: +
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col0 (type: bigint)
auto parallelism: false
@@ -30512,13 +30512,13 @@ STAGE PLANS:
Select Operator
expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
index c57a010..095a701 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out
@@ -64,7 +64,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cboolean1 < 0) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or ((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -73,7 +73,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterLongColLessLongScalar(col 10:boolean, val 0), FilterLongColLessLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint), FilterLongColGreaterLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterLongColGreaterLongScalar(col 11:boolean, val 0)))
predicate: ((cboolean1 < 0) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or ((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cfloat (type: float), cint (type: int), cdouble (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -82,7 +82,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 4, 2, 5, 14, 17, 18]
selectExpressions: CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col1), max(_col0), max(_col2), sum(_col6), sum(_col3), count(_col3), count(_col2)
Group By Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out
index 65fa5c1..9ce20c8 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_10.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -77,7 +77,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(6,2), val -5638.15)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(6,2))), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(11,4)), FilterStringColLikeStringScalar(col 6:string, pattern %a))))
predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean)
- Statistics: Num rows: 9557 Data size: 461790 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9557 Data size: 461542 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -86,13 +86,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33]
selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double
- Statistics: Num rows: 9557 Data size: 461790 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9557 Data size: 461542 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 9557 Data size: 461790 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9557 Data size: 461542 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out
index 61782ad..5f1aac5 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_11.q.out
@@ -50,7 +50,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -59,7 +59,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string), FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)))
predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean)
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -68,13 +68,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [6, 10, 5, 8, 13, 14, 15, 17, 18]
selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1:int)(children: col 1:smallint) -> 13:int, DoubleColSubtractDoubleScalar(col 5:double, val 9763215.5639) -> 14:double, DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleScalar(col 16:double, val 6981.0)(children: DoubleColUnaryMinus(col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleScalar(col 5:double, val -5638.15) -> 18:double
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
index 30b6842..9b0560a 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out
@@ -87,7 +87,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -96,7 +96,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)))
predicate: (ctimestamp1 is null and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint))) (type: boolean)
- Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3754 Data size: 181294 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -105,7 +105,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 17]
selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double
- Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3754 Data size: 181294 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6)
Group By Vectorization:
@@ -120,7 +120,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3754 Data size: 181294 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean)
null sort order: aaaa
@@ -130,7 +130,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 3754 Data size: 181391 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3754 Data size: 181294 Basic stats: COMPLETE Column stats: NONE
value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double)
Execution mode: vectorized
Map Vectorization:
@@ -164,7 +164,7 @@ STAGE PLANS:
keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
- Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1877 Data size: 90647 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
@@ -173,7 +173,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 21, 23, 26, 27, 29, 31, 9, 34, 38, 43, 48]
selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 20:double)(children: DoubleColDivideLongColumn(col 16:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 16:double, IfExprNullCondExpr(col 17:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 20:double) -> 21:double, DoubleColDivideDoubleScalar(col 22:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 22:double) -> 23:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 24:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 24:double) -> 25:double) -> 26:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 27:double, DoubleColUnaryMinus(col 28:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 28:double) -> 29:double, DecimalScalarAddDecimalColumn(val -5638.15, col 30:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 30:decimal(19,0)) -> 31:decimal(22,2), DoubleColDivideDoubleColumn(col 32:double, col 33:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 32:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 33:double) -> 34:double, DoubleColUnaryMinus(col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColDivideDoubleScalar(col 35:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 35:double) -> 36:double) -> 37:double) -> 38:double, DoubleColAddDoubleColumn(col 40:double, col 42:double)(children: DoubleColDivideDoubleScalar(col 39:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 39:double) -> 40:double, DoubleColUnaryMinus(col 41:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 41:double) -> 42:double) -> 43:double, FuncPowerDoubleToDouble(col 47:double)(children: DoubleColDivideLongColumn(col 46:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 45:double)(children: DoubleColDivideLongColumn(col 44:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 44:double) -> 45:double) -> 46:double) -> 47:double) -> 48:double
- Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1877 Data size: 90647 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string)
null sort order: zzz
@@ -182,7 +182,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1877 Data size: 90647 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double)
Reducer 3
Execution mode: vectorized
@@ -201,13 +201,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18]
selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp
- Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1877 Data size: 90647 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 1877 Data size: 90695 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1877 Data size: 90647 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
index 875348c..921c8b9 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out
@@ -89,7 +89,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -98,7 +98,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4))))
predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -107,7 +107,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20]
selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1)
Group By Vectorization:
@@ -122,7 +122,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
null sort order: aaaaa
@@ -132,7 +132,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint)
Execution mode: vectorized
Map Vectorization:
@@ -166,7 +166,7 @@ STAGE PLANS:
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
@@ -175,7 +175,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 21, 22, 23, 28, 29, 34, 38, 40, 43, 13, 49, 14]
selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 20:double)(children: CastLongToDouble(col 19:tinyint)(children: LongColAddLongColumn(col 18:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 18:tinyint) -> 19:tinyint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 6:double) -> 22:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 23:float, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 26:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double, DoubleColUnaryMinus(col 6:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DecimalColSubtractDecimalScalar(col 37:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 36:tinyint)(children: LongColAddLongColumn(col 35:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 35:tinyint) -> 36:tinyint) -> 37:decimal(3,0)) -> 38:decimal(7,3), DoubleColUnaryMinus(col 39:double)(children: DoubleColUnaryMinus(col 6:double) -> 39:double) -> 40:double, DoubleScalarDivideDoubleColumn(val -26.28, col 42:double)(children: DoubleColUnaryMinus(col 41:double)(children: DoubleColUnaryMinus(col 6:double) -> 41:double) -> 42:double) -> 43:double, DoubleColDivideDoubleColumn(col 47:double, col 48:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 46:double)(children: CastLongToDouble(col 45:tinyint)(children: LongColAddLongColumn(col 44:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 44:tinyint) -> 45:tinyint) -> 46:double) -> 47:double, CastLongToDouble(col 1:tinyint) -> 48:double) -> 49:double
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
null sort order: zzzzzzzzzzzzzzzzzzzzz
@@ -184,7 +184,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized
@@ -202,7 +202,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20]
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 40
Limit Vectorization:
@@ -425,7 +425,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -434,7 +434,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4))))
predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -443,7 +443,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20]
selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1)
Group By Vectorization:
@@ -458,7 +458,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
null sort order: aaaaa
@@ -468,7 +468,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 2730 Data size: 131912 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2730 Data size: 131841 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint)
Execution mode: vectorized
Map Vectorization:
@@ -502,7 +502,7 @@ STAGE PLANS:
keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
@@ -511,7 +511,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 21, 22, 23, 28, 29, 34, 38, 40, 43, 13, 49, 14]
selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 20:double)(children: CastLongToDouble(col 19:tinyint)(children: LongColAddLongColumn(col 18:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 18:tinyint) -> 19:tinyint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 6:double) -> 22:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 23:float, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 26:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double, DoubleColUnaryMinus(col 6:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DecimalColSubtractDecimalScalar(col 37:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 36:tinyint)(children: LongColAddLongColumn(col 35:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 35:tinyint) -> 36:tinyint) -> 37:decimal(3,0)) -> 38:decimal(7,3), DoubleColUnaryMinus(col 39:double)(children: DoubleColUnaryMinus(col 6:double) -> 39:double) -> 40:double, DoubleScalarDivideDoubleColumn(val -26.28, col 42:double)(children: DoubleColUnaryMinus(col 41:double)(children: DoubleColUnaryMinus(col 6:double) -> 41:double) -> 42:double) -> 43:double, DoubleColDivideDoubleColumn(col 47:double, col 48:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 46:double)(children: CastLongToDouble(col 45:tinyint)(children: LongColAddLongColumn(col 44:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 44:tinyint) -> 45:tinyint) -> 46:double) -> 47:double, CastLongToDouble(col 1:tinyint) -> 48:double) -> 49:double
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
null sort order: zzzzzzzzzzzzzzzzzzzzz
@@ -520,7 +520,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Reducer 3
Execution mode: vectorized
@@ -538,7 +538,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20]
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 40
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
index 8ef326c..6fb575d 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out
@@ -89,7 +89,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -98,7 +98,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 14:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float)), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 15:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 15:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp)))
predicate: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean)
- Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 606 Data size: 29265 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -107,7 +107,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [8, 4, 6, 10, 5, 17, 22, 4, 23]
selectExpressions: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 20:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 23:double
- Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 606 Data size: 29265 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1)
Group By Vectorization:
@@ -122,7 +122,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 606 Data size: 29265 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean)
null sort order: aaaaa
@@ -132,7 +132,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 606 Data size: 29281 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 606 Data size: 29265 Basic stats: COMPLETE Column stats: NONE
value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint)
Execution mode: vectorized
Map Vectorization:
@@ -166,7 +166,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
- Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 303 Data size: 14632 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
@@ -175,7 +175,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 22, 23, 8, 24, 25, 28, 33, 11, 37, 46, 47, 51, 56, 63, 65]
selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 20:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 15:double) -> 16:double) -> 17:double, IfExprNullCondExpr(col 18:boolean, null, col 19:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 19:bigint) -> 20:bigint) -> 21:double) -> 22:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 23:float, DoubleColUnaryMinus(col 1:float) -> 24:float, DoubleColUnaryMinus(col 8:float) -> 25:float, DoubleColDivideDoubleScalar(col 27:double, val 10.175)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 26:double) -> 27:double) -> 28:double, FuncPowerDoubleToDouble(col 32:double)(children: DoubleColDivideLongColumn(col 31:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double, DoubleColUnaryMinus(col 36:double)(children: DoubleColDivideDoubleScalar(col 35:double, val 10.175)(children: DoubleColUnaryMinus(col 34:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 34:double) -> 35:double) -> 36:double) -> 37:double, DoubleScalarModuloDoubleColumn(val -1.389, col 45:double)(children: FuncPowerDoubleToDouble(col 44:double)(children: DoubleColDivideLongColumn(col 40:double, col 43:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 38:double) -> 39:double) -> 40:double, IfExprNullCondExpr(col 41:boolean, null, col 42:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 41:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 42:bigint) -> 43:bigint) -> 44:double) -> 45:double) -> 46:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 47:double, DoubleColDivideLongColumn(col 50:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 49:double)(children: DoubleColDivideLongColumn(col 48:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 48:double) -> 49:double) -> 50:double) -> 51:double, DoubleColModuloDoubleScalar(col 55:double, val 10.175)(children: DoubleColDivideLongColumn(col 54:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 53:double)(children: DoubleColDivideLongColumn(col 52:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 52:double) -> 53:double) -> 54:double) -> 55:double) -> 56:double, DoubleColDivideLongColumn(col 59:double, col 62:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 58:double) -> 59:double, IfExprNullCondExpr(col 60:boolean, null, col 61:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 60:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 61:bigint) -> 62:bigint) -> 63:double, DoubleColUnaryMinus(col 64:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 64:double) -> 65:double
- Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 303 Data size: 14632 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp)
null sort order: zzzz
@@ -184,7 +184,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 303 Data size: 14632 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
Reducer 3
Execution mode: vectorized
@@ -202,13 +202,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
- Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 303 Data size: 14632 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 303 Data size: 14640 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 303 Data size: 14632 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
index f124b67..88095ba 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out
@@ -85,7 +85,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -94,7 +94,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0)))
predicate: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
@@ -103,7 +103,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 18, 21]
selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5)
Group By Vectorization:
@@ -118,7 +118,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
null sort order: aaaaaaa
@@ -128,7 +128,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint)
Execution mode: vectorized
Map Vectorization:
@@ -151,16 +151,16 @@ STAGE PLANS:
keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
null sort order: zzzzzzz
sort order: +++++++
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double)
Reducer 3
Reduce Vectorization:
@@ -171,10 +171,10 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
index 6658e64..cc5275d 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out
@@ -62,7 +62,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -71,7 +71,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a)))
predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean)
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -80,7 +80,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [6, 5, 8, 13]
selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1)
Group By Vectorization:
@@ -95,7 +95,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
null sort order: aaa
@@ -105,7 +105,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double)
Execution mode: vectorized
Map Vectorization:
@@ -139,7 +139,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2048 Data size: 98905 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
@@ -148,13 +148,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 17, 26, 36, 6, 37, 39, 47]
selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double, IfExprNullCondExpr(col 13:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 25:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double) -> 19:double) -> 20:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 24:double) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 34:double, col 35:double)(children: FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 29:double, IfExprNullCondExpr(col 30:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 33:double) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 37:double, DecimalColDivideDecimalScalar(col 38:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 38:decimal(19,0)) -> 39:decimal(28,6), FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 41:double)(children: DoubleColDivideLongColumn(col 40:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 40:double) -> 41:double) -> 42:double, IfExprNullCondExpr(col 43:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 43:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 46:double) -> 47:double
- Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2048 Data size: 98905 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2048 Data size: 98905 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
index f431947..8829f80 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out
@@ -70,7 +70,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -79,7 +79,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float)), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3))))
predicate: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean)
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
@@ -88,7 +88,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 17, 20, 22, 24, 26, 29]
selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 17:double, DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 18:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 18:double) -> 19:double) -> 20:double, DoubleColDivideDoubleColumn(col 5:double, col 21:double)(children: CastLongToDouble(col 2:int) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColUnaryMinus(col 5:double) -> 23:double) -> 24:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 25:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 25:decimal(19,0)) -> 26:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 28:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 5:double) -> 27:double) -> 28:double) -> 29:double
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col5 (type: bigint), _col0 (type: float)
null sort order: zz
@@ -97,7 +97,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double)
Execution mode: vectorized
Map Vectorization:
@@ -125,13 +125,13 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13]
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
index 1a2ffd0..ae8bf58 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -77,7 +77,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 14:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterLongColLessLongScalar(col 2:int, val 359))), FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375)))
predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean)
- Statistics: Num rows: 4778 Data size: 230870 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4778 Data size: 230747 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
@@ -86,7 +86,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 4, 3, 0, 5, 15, 18]
selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double
- Statistics: Num rows: 4778 Data size: 230870 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4778 Data size: 230747 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4)
Group By Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
index 64be366..e96c587 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out
@@ -73,7 +73,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -82,7 +82,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 13:double), FilterDecimalColGreaterEqualDecimalScalar(col 14:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 14:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp)), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 15:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 15:float), FilterDecimalColNotEqualDecimalScalar(col 16:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 16:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 17:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 17:double)))
predicate: (((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2)) or ((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D))) (type: boolean)
- Statistics: Num rows: 2503 Data size: 120943 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2503 Data size: 120879 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
@@ -91,7 +91,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [1, 0, 4, 2, 18, 21, 22, 25, 4, 26, 27, 30]
selectExpressions: CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 1:smallint) -> 19:double, CastLongToDouble(col 1:smallint) -> 20:double) -> 21:double, CastLongToDouble(col 0:tinyint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double, CastLongToDouble(col 0:tinyint) -> 24:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 26:double, CastLongToDouble(col 2:int) -> 27:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: CastLongToDouble(col 2:int) -> 28:double, CastLongToDouble(col 2:int) -> 29:double) -> 30:double
- Statistics: Num rows: 2503 Data size: 120943 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2503 Data size: 120879 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10)
Group By Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
index 1196b71..255f0ad 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((UDFToInteger(csmallint) >= cint) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or ((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -77,7 +77,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1:int, col 2:int)(children: col 1:smallint), FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 3:bigint, val -563), FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessEqualDoubleScalar(col 5:double, val -3728.0))), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterDoubleColGreaterDoubleScalar(col 5:double, val 79.553)))
predicate: ((UDFToInteger(csmallint) >= cint) or ((cbigint <> -563L) and ((UDFToLong(ctinyint) <> cbigint) or (cdouble <= -3728.0D))) or ((UDFToInteger(ctinyint) <= -89010) and (cdouble > 79.553D))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int), cdouble (type: double), ctinyint (type: tinyint), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -86,7 +86,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [2, 5, 0, 13]
selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), sum(_col3), sum(_col1), count(_col1), min(_col2)
Group By Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out
index 497367c..ceff0e8 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out
@@ -62,7 +62,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -71,7 +71,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %b%), SelectColumnIsNotNull(col 11:boolean)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterStringColLikeStringScalar(col 7:string, pattern a), SelectColumnIsNotNull(col 9:timestamp)))
predicate: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean)
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
outputColumnNames: ctinyint, csmallint, cint
@@ -79,7 +79,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
- Statistics: Num rows: 9216 Data size: 445313 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 9216 Data size: 445074 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint)
Group By Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out
index 2f00044..a9b48fc 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_6.q.out
@@ -62,7 +62,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (((cstring2 like '%a') or (cfloat <= -257.0)) and cbigint is not null)) and (ctinyint <> 0Y)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -71,7 +71,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10:boolean, val 0), FilterLongColGreaterEqualLongColumn(col 11:boolean, col 10:boolean)), FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %a), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -257.0)), SelectColumnIsNotNull(col 3:bigint))), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0))
predicate: ((((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (((cstring2 like '%a') or (cfloat <= -257.0)) and cbigint is not null)) and (ctinyint <> 0Y)) (type: boolean)
- Statistics: Num rows: 11605 Data size: 560748 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11605 Data size: 560448 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28D / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
@@ -80,13 +80,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 4, 6, 13, 14, 15, 16, 17, 18, 19, 20, 22]
selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1:int)(children: col 1:smallint) -> 13:int, LongColUnaryMinus(col 1:smallint) -> 14:smallint, DoubleColUnaryMinus(col 4:float) -> 15:float, DoubleScalarDivideDoubleColumn(val -26.28, col 4:double)(children: col 4:float) -> 16:double, DoubleColMultiplyDoubleScalar(col 4:float, val 359.0) -> 17:float, LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 18:int, DoubleColUnaryMinus(col 5:double) -> 19:double, LongColSubtractLongScalar(col 0:int, val -75)(children: col 0:tinyint) -> 20:int, LongScalarMultiplyLongColumn(val 762, col 21:int)(children: LongColModuloLongColumn(col 2:int, col 0:int)(children: col 0:tinyint) -> 21:int) -> 22:int
- Statistics: Num rows: 11605 Data size: 560748 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11605 Data size: 560448 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 11605 Data size: 560748 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11605 Data size: 560448 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out
index c9ae0c9..9cd2f50 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out
@@ -76,7 +76,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -85,7 +85,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0))
predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean)
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -94,7 +94,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26]
selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint)
null sort order: zzzzzzzzzzzzzzz
@@ -103,7 +103,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -131,7 +131,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14]
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 25
Limit Vectorization:
@@ -314,7 +314,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -323,7 +323,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0))
predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean)
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -332,7 +332,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26]
selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint)
null sort order: zzzzzzzzzzzzzzz
@@ -341,7 +341,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -369,7 +369,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14]
- Statistics: Num rows: 5461 Data size: 263873 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5461 Data size: 263731 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 25
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out
index 07dab14..62d6071 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out
@@ -72,7 +72,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -81,7 +81,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean)))
predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
@@ -90,7 +90,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29]
selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double)
null sort order: zzzzzzzzzzzzzz
@@ -99,7 +99,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -127,7 +127,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
@@ -297,7 +297,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -306,7 +306,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean)))
predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
@@ -315,7 +315,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29]
selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double)
null sort order: zzzzzzzzzzzzzz
@@ -324,7 +324,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -352,7 +352,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
index 6658e64..cc5275d 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out
@@ -62,7 +62,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -71,7 +71,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a)))
predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean)
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3
@@ -80,7 +80,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [6, 5, 8, 13]
selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1)
Group By Vectorization:
@@ -95,7 +95,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp)
null sort order: aaa
@@ -105,7 +105,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double)
Execution mode: vectorized
Map Vectorization:
@@ -139,7 +139,7 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2048 Data size: 98905 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
@@ -148,13 +148,13 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 17, 26, 36, 6, 37, 39, 47]
selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double, IfExprNullCondExpr(col 13:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 25:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double) -> 19:double) -> 20:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 24:double) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 34:double, col 35:double)(children: FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 29:double, IfExprNullCondExpr(col 30:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 33:double) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 37:double, DecimalColDivideDecimalScalar(col 38:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 38:decimal(19,0)) -> 39:decimal(28,6), FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 41:double)(children: DoubleColDivideLongColumn(col 40:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 40:double) -> 41:double) -> 42:double, IfExprNullCondExpr(col 43:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 43:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 46:double) -> 47:double
- Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2048 Data size: 98905 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
File Sink Vectorization:
className: VectorFileSinkOperator
native: false
- Statistics: Num rows: 2048 Data size: 98958 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2048 Data size: 98905 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out
index 2ba16e2..615eca9 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_decimal_date.q.out
@@ -38,7 +38,7 @@ STAGE PLANS:
TableScan
alias: date_decimal_test_parquet
filterExpr: (cint is not null and cdouble is not null) (type: boolean)
- Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 176484 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -47,7 +47,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:double))
predicate: (cint is not null and cdouble is not null) (type: boolean)
- Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 176484 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cdate (type: date), cdecimal (type: decimal(20,10))
outputColumnNames: _col0, _col1
@@ -55,7 +55,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [2, 3]
- Statistics: Num rows: 12288 Data size: 176614 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 176484 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
index 98637c0..4e43490 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_div0.q.out
@@ -25,7 +25,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -36,7 +36,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [13]
selectExpressions: DoubleColDivideDoubleScalar(col 5:double, val 0.0) -> 13:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Limit Vectorization:
@@ -210,7 +210,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cbigint > 0L) and (cbigint < 100000000L)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -219,7 +219,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val 0), FilterLongColLessLongScalar(col 3:bigint, val 100000000))
predicate: ((cbigint > 0L) and (cbigint < 100000000L)) (type: boolean)
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (cbigint - 988888L) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888L))) (type: double), (1.2 / CAST( (cbigint - 988888L) AS decimal(19,0))) (type: decimal(22,21))
outputColumnNames: _col0, _col1, _col2
@@ -228,7 +228,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [13, 16, 19]
selectExpressions: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 13:bigint, DoubleColDivideDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 14:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 14:bigint) -> 15:double) -> 16:double, DecimalScalarDivideDecimalColumn(val 1.2, col 18:decimal(19,0))(children: CastLongToDecimal(col 17:bigint)(children: LongColSubtractLongScalar(col 3:bigint, val 988888) -> 17:bigint) -> 18:decimal(19,0)) -> 19:decimal(22,21)
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: bigint), _col1 (type: double)
null sort order: zz
@@ -237,7 +237,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col2 (type: decimal(22,21))
Execution mode: vectorized
@@ -266,7 +266,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Limit Vectorization:
@@ -432,7 +432,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cdouble >= -500.0D) and (cdouble < -199.0D)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -441,7 +441,7 @@ STAGE PLANS:
native: true
predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -500.0), FilterDoubleColLessDoubleScalar(col 5:double, val -199.0))
predicate: ((cdouble >= -500.0D) and (cdouble < -199.0D)) (type: boolean)
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (cdouble + 200.0D) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0D)) (type: double), ((cdouble + 200.0D) / (cdouble + 200.0D)) (type: double), (3.0D / (cdouble + 200.0D)) (type: double), (1.2D / (cdouble + 200.0D)) (type: double)
outputColumnNames: _col0, _col1, _col2, _col4, _col5
@@ -450,7 +450,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [13, 16, 19, 21, 23]
selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 13:double, DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 15:double) -> 16:double, DoubleColDivideDoubleColumn(col 17:double, col 18:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 17:double, DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 18:double) -> 19:double, DoubleScalarDivideDoubleColumn(val 3.0, col 20:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 20:double) -> 21:double, DoubleScalarDivideDoubleColumn(val 1.2, col 22:double)(children: DoubleColAddDoubleScalar(col 5:double, val 200.0) -> 22:double) -> 23:double
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: double)
null sort order: zz
@@ -459,7 +459,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double)
Execution mode: vectorized
@@ -488,7 +488,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2, 1, 3, 4]
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 100
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
index 7f03cba..e730d5d 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out
@@ -25,14 +25,14 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cint > 0) and (UDFToDouble(cbigint) < cdouble)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((cint > 0) and (UDFToDouble(cbigint) < cdouble)) (type: boolean)
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cdouble (type: double)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 7
Statistics: Num rows: 7 Data size: 336 Basic stats: COMPLETE Column stats: NONE
@@ -106,7 +106,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ctinyint is not null (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -115,7 +115,7 @@ STAGE PLANS:
native: true
predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
predicate: ctinyint is not null (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
outputColumnNames: _col0, _col1, _col2
@@ -123,7 +123,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 5, 1]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: double)
null sort order: zz
@@ -132,7 +132,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
value expressions: _col2 (type: smallint)
Execution mode: vectorized
@@ -161,7 +161,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
@@ -242,7 +242,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -253,7 +253,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 13]
selectExpressions: DoubleColAddDoubleScalar(col 5:double, val 1.0) -> 13:double
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col1), count(_col1)
Group By Vectorization:
@@ -268,7 +268,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
null sort order: z
@@ -278,7 +278,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
value expressions: _col1 (type: double), _col2 (type: bigint)
Execution mode: vectorized
@@ -313,7 +313,7 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: tinyint), (_col1 / _col2) (type: double)
outputColumnNames: _col0, _col1
@@ -322,7 +322,7 @@ STAGE PLANS:
native: true
projectedOutputColumnNums: [0, 3]
selectExpressions: DoubleColDivideLongColumn(col 1:double, col 2:bigint) -> 3:double
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
@@ -403,7 +403,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -413,7 +413,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
@@ -426,7 +426,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
null sort order: a
@@ -436,7 +436,7 @@ STAGE PLANS:
className: VectorReduceSinkLongOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
Execution mode: vectorized
Map Vectorization:
@@ -468,7 +468,7 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
@@ -549,7 +549,7 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Select Operator
@@ -559,7 +559,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 5]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
Group By Vectorization:
className: VectorGroupByOperator
@@ -572,7 +572,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: double)
null sort order: za
@@ -582,7 +582,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Execution mode: vectorized
Map Vectorization:
enabled: true
@@ -613,7 +613,7 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count(_col1)
Group By Vectorization:
@@ -627,7 +627,7 @@ STAGE PLANS:
keys: _col0 (type: tinyint)
mode: complete
outputColumnNames: _col0, _col1
- Statistics: Num rows: 3072 Data size: 148437 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3072 Data size: 148358 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
@@ -742,7 +742,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ctinyint is not null (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -751,7 +751,7 @@ STAGE PLANS:
native: true
predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
predicate: ctinyint is not null (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(ctinyint)
Group By Vectorization:
@@ -766,7 +766,7 @@ STAGE PLANS:
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
null sort order: a
@@ -776,7 +776,7 @@ STAGE PLANS:
className: VectorReduceSinkMultiKeyOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
@@ -810,7 +810,7 @@ STAGE PLANS:
keys: KEY._col0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: bigint), _col0 (type: double)
null sort order: zz
@@ -819,7 +819,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.3
Reducer 3
Execution mode: vectorized
@@ -837,7 +837,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [1, 0]
- Statistics: Num rows: 6144 Data size: 296875 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 296716 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 20
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out
index 804627e..d808811 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_offset_limit.q.out
@@ -25,14 +25,14 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ((cint > 0) and (UDFToDouble(cbigint) < cdouble)) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: ((cint > 0) and (UDFToDouble(cbigint) < cdouble)) (type: boolean)
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint), cdouble (type: double)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 1365 Data size: 65956 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1365 Data size: 65920 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 2
Offset of rows: 3
@@ -102,7 +102,7 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: ctinyint is not null (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TableScan Vectorization:
native: true
Filter Operator
@@ -111,7 +111,7 @@ STAGE PLANS:
native: true
predicateExpression: SelectColumnIsNotNull(col 0:tinyint)
predicate: ctinyint is not null (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cdouble (type: double), csmallint (type: smallint)
outputColumnNames: _col0, _col1, _col2
@@ -119,7 +119,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 5, 1]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint), _col1 (type: double)
null sort order: zz
@@ -128,7 +128,7 @@ STAGE PLANS:
className: VectorReduceSinkObjectHashOperator
native: true
nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
value expressions: _col2 (type: smallint)
Execution mode: vectorized
@@ -157,7 +157,7 @@ STAGE PLANS:
className: VectorSelectOperator
native: true
projectedOutputColumnNums: [0, 1, 2]
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 3
Limit Vectorization:
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out
index 146ce04..35d37b8 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_part_project.q.out
@@ -77,16 +77,16 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypesparquet_part_n0
- Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 9196 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: (cdouble + 2.0D) (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 9196 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
null sort order: z
sort order: +
- Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 9196 Basic stats: COMPLETE Column stats: NONE
TopN Hash Memory Usage: 0.1
Execution mode: vectorized
Map Vectorization:
@@ -110,13 +110,13 @@ STAGE PLANS:
Select Operator
expressions: KEY.reducesinkkey0 (type: double)
outputColumnNames: _col0
- Statistics: Num rows: 200 Data size: 9800 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 200 Data size: 9196 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
- Statistics: Num rows: 10 Data size: 490 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 450 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 10 Data size: 490 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 10 Data size: 450 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out
index 333c687..9c4cec7 100644
--- ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out
+++ ql/src/test/results/clientpositive/spark/parquet_vectorization_pushdown.q.out
@@ -27,14 +27,14 @@ STAGE PLANS:
TableScan
alias: alltypesparquet
filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean)
- Statistics: Num rows: 12288 Data size: 593751 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 593433 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean)
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cbigint (type: bigint)
outputColumnNames: cbigint
- Statistics: Num rows: 4096 Data size: 197917 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4096 Data size: 197811 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(cbigint), count(cbigint)
minReductionHashAggr: 0.99
diff --git ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
index 9677116..4d2e047 100644
--- ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
+++ ql/src/test/results/clientpositive/spark/vectorization_input_format_excludes.q.out
@@ -78,14 +78,14 @@ STAGE PLANS:
TableScan
alias: alltypes_parquet_n0
filterExpr: (cint = 528534767) (type: boolean)
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (cint = 528534767) (type: boolean)
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
@@ -178,24 +178,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypes_parquet_n0
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5)
keys: _col0 (type: tinyint)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint)
Execution mode: vectorized
Map Vectorization:
@@ -221,14 +221,14 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -422,14 +422,14 @@ STAGE PLANS:
TableScan
alias: alltypes_parquet_n0
filterExpr: (cint = 528534767) (type: boolean)
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (cint = 528534767) (type: boolean)
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
@@ -516,24 +516,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypes_parquet_n0
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5)
keys: _col0 (type: tinyint)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint)
Map Vectorization:
enabled: false
@@ -553,14 +553,14 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -754,14 +754,14 @@ STAGE PLANS:
TableScan
alias: alltypes_parquet_n0
filterExpr: (cint = 528534767) (type: boolean)
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (cint = 528534767) (type: boolean)
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: 528534767 (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE
@@ -854,24 +854,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: alltypes_parquet_n0
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), (cdouble * cdouble) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: max(_col1), min(_col2), count(_col3), sum(_col4), count(_col4), sum(_col6), sum(_col5), count(_col5)
keys: _col0 (type: tinyint)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: tinyint)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: tinyint)
- Statistics: Num rows: 12288 Data size: 256791 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 12288 Data size: 256593 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: double), _col8 (type: bigint)
Execution mode: vectorized
Map Vectorization:
@@ -897,14 +897,14 @@ STAGE PLANS:
keys: KEY._col0 (type: tinyint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), (_col4 / _col5) (type: double), power(((_col6 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 6144 Data size: 128395 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6144 Data size: 128296 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1573,17 +1573,17 @@ STAGE PLANS:
TableScan
alias: parquettbl
filterExpr: (UDFToInteger((t1 + t2)) > 10) (type: boolean)
- Statistics: Num rows: 3 Data size: 126 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 3 Data size: 70 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (UDFToInteger((t1 + t2)) > 10) (type: boolean)
- Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 23 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: t1 (type: tinyint), t2 (type: tinyint), (t1 + t2) (type: tinyint)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 23 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 1 Data size: 42 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 23 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out
index 7808f85..2dd3af6 100644
--- ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out
+++ ql/src/test/results/clientpositive/spark/vectorization_parquet_projection.q.out
@@ -139,14 +139,14 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: parquet_project_test
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int), m1 (type: map)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -221,9 +221,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: parquet_project_test
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99
@@ -311,24 +311,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: parquet_project_test
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: cint (type: int)
outputColumnNames: cint
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: cint (type: int)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: int)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: int)
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized
Map Vectorization:
@@ -354,10 +354,10 @@ STAGE PLANS:
keys: KEY._col0 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 222 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 222 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -411,24 +411,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: parquet_project_test
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: m1['color'] (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: _col0 (type: string)
minReductionHashAggr: 0.99
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
null sort order: a
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Map Vectorization:
enabled: true
@@ -450,10 +450,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 222 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 11 Data size: 354 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 11 Data size: 222 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -530,9 +530,9 @@ STAGE PLANS:
TableScan
alias: parquet_nullsplit
filterExpr: (len = '1') (type: boolean)
- Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Select Operator
- Statistics: Num rows: 1 Data size: 108 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 60 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99
diff --git ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
index ca211b3..cb2e1cc 100644
--- ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
+++ ql/src/test/results/clientpositive/vectorization_parquet_projection.q.out
@@ -213,9 +213,9 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: parquet_project_test
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 22 Data size: 709 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 22 Data size: 445 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99
@@ -495,9 +495,9 @@ STAGE PLANS:
TableScan
alias: parquet_nullsplit
filterExpr: (len = '1') (type: boolean)
- Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 68 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
minReductionHashAggr: 0.99