diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index c7aa93ec00..f02a300e1d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -23,7 +23,10 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; @@ -53,7 +56,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -106,7 +108,7 @@ * These members have information for data type conversion. * Not defined if there is no conversion. */ - PrimitiveObjectInspector[] convertSourcePrimitiveObjectInspectors; + ObjectInspector[] convertSourceOI; // The primitive object inspector of the source data type for any column being // converted. Otherwise, null. @@ -128,7 +130,7 @@ private void allocateArrays(int count) { * Allocate the source conversion related arrays (optional). */ private void allocateConvertArrays(int count) { - convertSourcePrimitiveObjectInspectors = new PrimitiveObjectInspector[count]; + convertSourceOI = new ObjectInspector[count]; convertTargetWritables = new Writable[count]; } @@ -163,12 +165,10 @@ private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, Ty private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) { isConvert[logicalColumnIndex] = true; final Category convertSourceCategory = convertSourceTypeInfo.getCategory(); - if (convertSourceCategory == Category.PRIMITIVE) { - final PrimitiveTypeInfo convertSourcePrimitiveTypeInfo = (PrimitiveTypeInfo) convertSourceTypeInfo; - convertSourcePrimitiveObjectInspectors[logicalColumnIndex] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - convertSourcePrimitiveTypeInfo); + convertSourceOI[logicalColumnIndex] = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(convertSourceTypeInfo); + if (convertSourceCategory == Category.PRIMITIVE) { // These need to be based on the target. final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory(); @@ -298,36 +298,29 @@ public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos } else { final TypeInfo targetTypeInfo = targetTypeInfos[i]; + final TypeInfo sourceTypeInfo = sourceTypeInfos[i]; - if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { + if (!sourceTypeInfo.equals(targetTypeInfo)) { - // For now, we don't have an assigner for complex types... - - } else { - final TypeInfo sourceTypeInfo = sourceTypeInfos[i]; + if (VectorPartitionConversion.isImplicitVectorColumnConversion( + sourceTypeInfo, targetTypeInfo)) { - if (!sourceTypeInfo.equals(targetTypeInfo)) { + // Do implicit conversion accepting the source type and putting it in the same + // target type ColumnVector type. + initTargetEntry(i, i, sourceTypeInfo); - if (VectorPartitionConversion.isImplicitVectorColumnConversion( - sourceTypeInfo, targetTypeInfo)) { - - // Do implicit conversion accepting the source type and putting it in the same - // target type ColumnVector type. - initTargetEntry(i, i, sourceTypeInfo); - - } else { - - // Do formal conversion... - initTargetEntry(i, i, targetTypeInfo); - initConvertSourceEntry(i, sourceTypeInfo); - - } } else { - // No conversion. + // Do formal conversion... initTargetEntry(i, i, targetTypeInfo); + initConvertSourceEntry(i, sourceTypeInfo); } + } else { + + // No conversion. + initTargetEntry(i, i, targetTypeInfo); + } } } @@ -594,18 +587,18 @@ private void assignRowColumn( case STRUCT: { final StructColumnVector structColumnVector = (StructColumnVector) columnVector; - final StructTypeInfo structTypeInfo = (StructTypeInfo) targetTypeInfo; - final List fieldStructTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - final int size = fieldStructTypeInfos.size(); + final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo; + final List targetFieldTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos(); + final int size = targetFieldTypeInfos.size(); if (object instanceof List) { final List struct = (List) object; for (int i = 0; i < size; i++) { - assignRowColumn(structColumnVector.fields[i], batchIndex, fieldStructTypeInfos.get(i), struct.get(i)); + assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), struct.get(i)); } } else { final Object[] array = (Object[]) object; for (int i = 0; i < size; i++) { - assignRowColumn(structColumnVector.fields[i], batchIndex, fieldStructTypeInfos.get(i), array[i]); + assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), array[i]); } } } @@ -647,113 +640,134 @@ private void assignRowColumn( public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, Object object) { Preconditions.checkState(isConvert[logicalColumnIndex]); - final Category targetCategory = targetTypeInfos[logicalColumnIndex].getCategory(); + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; + assignConvertRowColumn( + batch.cols[projectionColumnNum], + batchIndex, + targetTypeInfos[logicalColumnIndex], + convertSourceOI[logicalColumnIndex], + convertTargetWritables[logicalColumnIndex], + object); + } + + private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, + TypeInfo targetTypeInfo, ObjectInspector sourceObjectInspector, + Writable convertTargetWritable, Object object) { + + final Category targetCategory = targetTypeInfo.getCategory(); if (targetCategory == null) { /* * This is a column that we don't want (i.e. not included) -- we are done. */ return; } - final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } try { switch (targetCategory) { case PRIMITIVE: + final PrimitiveObjectInspector sourcePrimitiveOI = + (PrimitiveObjectInspector) sourceObjectInspector; final PrimitiveCategory targetPrimitiveCategory = - ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory(); + ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory(); switch (targetPrimitiveCategory) { case VOID: - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; case BOOLEAN: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = (PrimitiveObjectInspectorUtils.getBoolean( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]) ? 1 : 0); + object, sourcePrimitiveOI) ? 1 : 0); break; case BYTE: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getByte( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case SHORT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getShort( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case INT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getInt( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case LONG: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getLong( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case TIMESTAMP: { final Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestamp( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (timestamp == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( + ((TimestampColumnVector) columnVector).set( batchIndex, timestamp); } break; case DATE: { final Date date = PrimitiveObjectInspectorUtils.getDate( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (date == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - final DateWritable dateWritable = (DateWritable) convertTargetWritables[logicalColumnIndex]; + DateWritable dateWritable = (DateWritable) convertTargetWritable; + if (dateWritable == null) { + dateWritable = new DateWritable(); + } dateWritable.set(date); - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays(); } break; case FLOAT: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getFloat( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case DOUBLE: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getDouble( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case BINARY: { final BytesWritable bytesWritable = PrimitiveObjectInspectorUtils.getBinary( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (bytesWritable == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength()); } break; case STRING: { final String string = PrimitiveObjectInspectorUtils.getString( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (string == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - Text text = (Text) convertTargetWritables[logicalColumnIndex]; + Text text = (Text) convertTargetWritable; + if (text == null) { + text = new Text(); + } text.set(string); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, text.getBytes(), 0, text.getLength()); } break; @@ -763,16 +777,16 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, final HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (hiveVarchar == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // TODO: Do we need maxLength checking? byte[] bytes = hiveVarchar.getValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytes, 0, bytes.length); } break; @@ -782,9 +796,9 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, final HiveChar hiveChar = PrimitiveObjectInspectorUtils.getHiveChar( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (hiveChar == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // We store CHAR in vector row batch with padding stripped. @@ -792,7 +806,7 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, // TODO: Do we need maxLength checking? final byte[] bytes = hiveChar.getStrippedValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytes, 0, bytes.length); } break; @@ -800,12 +814,12 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, { final HiveDecimal hiveDecimal = PrimitiveObjectInspectorUtils.getHiveDecimal( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (hiveDecimal == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( + ((DecimalColumnVector) columnVector).set( batchIndex, hiveDecimal); } break; @@ -813,12 +827,12 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, { final HiveIntervalYearMonth intervalYearMonth = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (intervalYearMonth == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths(); } break; @@ -826,12 +840,12 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, { final HiveIntervalDayTime intervalDayTime = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (intervalDayTime == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set( + ((IntervalDayTimeColumnVector) columnVector).set( batchIndex, intervalDayTime); } break; @@ -840,18 +854,113 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, " not supported"); } break; + case LIST: + { + final ListColumnVector listColumnVector = (ListColumnVector) columnVector; + final ListObjectInspector sourceListOI = (ListObjectInspector) sourceObjectInspector; + final ObjectInspector sourceElementOI = sourceListOI.getListElementObjectInspector(); + final int size = sourceListOI.getListLength(object); + final TypeInfo targetElementTypeInfo = ((ListTypeInfo) targetTypeInfo).getListElementTypeInfo(); + + listColumnVector.offsets[batchIndex] = listColumnVector.childCount; + listColumnVector.childCount += size; + listColumnVector.ensureSize(listColumnVector.childCount, true); + listColumnVector.lengths[batchIndex] = size; + + for (int i = 0; i < size; i++) { + final Object element = sourceListOI.getListElement(object, i); + final int offset = (int) (listColumnVector.offsets[batchIndex] + i); + assignConvertRowColumn( + listColumnVector.child, + offset, + targetElementTypeInfo, + sourceElementOI, + null, + element); + } + } + break; + case MAP: + { + final MapColumnVector mapColumnVector = (MapColumnVector) columnVector; + final MapObjectInspector mapObjectInspector = (MapObjectInspector) sourceObjectInspector; + final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo; + + final Map map = mapObjectInspector.getMap(object); + for (Map.Entry entry : map.entrySet()) { + assignConvertRowColumn( + mapColumnVector.keys, + batchIndex, + mapTypeInfo.getMapKeyTypeInfo(), + mapObjectInspector.getMapKeyObjectInspector(), + null, + entry.getKey()); + assignConvertRowColumn( + mapColumnVector.values, + batchIndex, + mapTypeInfo.getMapValueTypeInfo(), + mapObjectInspector.getMapValueObjectInspector(), + null, + entry.getValue()); + } + } + break; + case STRUCT: + { + final StructColumnVector structColumnVector = (StructColumnVector) columnVector; + final StructObjectInspector sourceStructOI = (StructObjectInspector) sourceObjectInspector; + final List sourceFields = sourceStructOI.getAllStructFieldRefs(); + final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo; + final List targetTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos(); + final int size = targetTypeInfos.size(); + + for (int i = 0; i < size; i++) { + if (i < sourceFields.size()) { + final StructField sourceStructField = sourceFields.get(i); + final ObjectInspector sourceFieldOI = sourceStructField.getFieldObjectInspector(); + final Object sourceData = sourceStructOI.getStructFieldData(object, sourceStructField); + assignConvertRowColumn( + structColumnVector.fields[i], + batchIndex, + targetTypeInfos.get(i), + sourceFieldOI, + null, + sourceData); + } else { + final ColumnVector fieldColumnVector = structColumnVector.fields[i]; + VectorizedBatchUtil.setNullColIsNullValue(fieldColumnVector, batchIndex); + } + } + } + break; + case UNION: + { + final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector; + final UnionObjectInspector unionObjectInspector = (UnionObjectInspector) sourceObjectInspector; + final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo; + final int tag = unionObjectInspector.getTag(object); + + assignConvertRowColumn( + unionColumnVector.fields[tag], + batchIndex, + unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), + unionObjectInspector.getObjectInspectors().get(tag), + null, + unionObjectInspector.getField(tag)); + } + break; default: throw new RuntimeException("Category " + targetCategory.name() + " not supported"); } } catch (NumberFormatException e) { // Some of the conversion methods throw this exception on numeric parsing errors. - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // We always set the null flag to false when there is a value. - batch.cols[projectionColumnNum].isNull[batchIndex] = false; + columnVector.isNull[batchIndex] = false; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index e37816f0c4..2ad06fc128 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -40,7 +40,16 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; @@ -55,7 +64,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; @@ -112,7 +120,7 @@ private VectorDeserializeRow() { * This member has information for data type conversion. * Not defined if there is no conversion. */ - Writable conversionWritable; + private Object conversionWritable; // Conversion requires source be placed in writable so we can call upon // VectorAssignRow to convert and assign the row column. @@ -120,6 +128,8 @@ private VectorDeserializeRow() { // For a complex type, a helper object that describes elements, key/value pairs, // or fields. + private ObjectInspector objectInspector; + public Field(PrimitiveCategory primitiveCategory, int maxLength) { this.category = Category.PRIMITIVE; this.primitiveCategory = primitiveCategory; @@ -127,10 +137,13 @@ public Field(PrimitiveCategory primitiveCategory, int maxLength) { this.isConvert = false; this.conversionWritable = null; this.complexTypeHelper = null; + this.objectInspector = PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector(primitiveCategory); } - public Field(Category category, ComplexTypeHelper complexTypeHelper) { + public Field(Category category, ComplexTypeHelper complexTypeHelper, TypeInfo typeInfo) { this.category = category; + this.objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); this.primitiveCategory = null; this.maxLength = 0; this.isConvert = false; @@ -158,17 +171,21 @@ public boolean getIsConvert() { return isConvert; } - public void setConversionWritable(Writable conversionWritable) { + public void setConversionWritable(Object conversionWritable) { this.conversionWritable = conversionWritable; } - public Writable getConversionWritable() { + public Object getConversionWritable() { return conversionWritable; } public ComplexTypeHelper getComplexHelper() { return complexTypeHelper; } + + public ObjectInspector getObjectInspector() { + return objectInspector; + } } /* @@ -191,7 +208,7 @@ public ComplexTypeHelper getComplexHelper() { private Field topLevelFields[]; - VectorAssignRow convertVectorAssignRow; + private VectorAssignRow convertVectorAssignRow; // Use its conversion ability. /* @@ -231,7 +248,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { final ListComplexTypeHelper listHelper = new ListComplexTypeHelper( allocateField(listTypeInfo.getListElementTypeInfo())); - return new Field(category, listHelper); + return new Field(category, listHelper, sourceTypeInfo); } case MAP: { @@ -240,7 +257,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { new MapComplexTypeHelper( allocateField(mapTypeInfo.getMapKeyTypeInfo()), allocateField(mapTypeInfo.getMapValueTypeInfo())); - return new Field(category, mapHelper); + return new Field(category, mapHelper, sourceTypeInfo); } case STRUCT: { @@ -253,7 +270,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { } final StructComplexTypeHelper structHelper = new StructComplexTypeHelper(fields); - return new Field(category, structHelper); + return new Field(category, structHelper, sourceTypeInfo); } case UNION: { @@ -266,7 +283,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { } final UnionComplexTypeHelper unionHelper = new UnionComplexTypeHelper(fields); - return new Field(category, unionHelper); + return new Field(category, unionHelper, sourceTypeInfo); } default: throw new RuntimeException("Category " + category + " not supported"); @@ -711,8 +728,7 @@ private void storeComplexFieldRowColumn(ColumnVector fieldColVector, Field field, int batchIndex, boolean canRetainByteRef) throws IOException { if (!deserializeRead.readComplexField()) { - fieldColVector.isNull[batchIndex] = true; - fieldColVector.noNulls = false; + VectorizedBatchUtil.setNullColIsNullValue(fieldColVector, batchIndex); return; } @@ -735,6 +751,8 @@ private void storeComplexFieldRowColumn(ColumnVector fieldColVector, default: throw new RuntimeException("Category " + field.getCategory() + " not supported"); } + + fieldColVector.isNull[batchIndex] = false; } private void storeListRowColumn(ColumnVector colVector, @@ -867,7 +885,7 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, Field field, int logicalColumnIndex, boolean canRetainByteRef) throws IOException { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; - ColumnVector colVector = batch.cols[projectionColumnNum]; + final ColumnVector colVector = batch.cols[projectionColumnNum]; switch (field.getCategory()) { case PRIMITIVE: @@ -890,7 +908,7 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, } // We always set the null flag to false when there is a value. - batch.cols[projectionColumnNum].isNull[batchIndex] = false; + colVector.isNull[batchIndex] = false; } /** @@ -907,156 +925,336 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, Field field, int logicalColumnIndex) throws IOException { - Writable convertSourceWritable = field.getConversionWritable(); + final int projectionColumnIndex = projectionColumnNums[logicalColumnIndex]; + final ColumnVector colVector = batch.cols[projectionColumnIndex]; + + final Object convertSourceWritable; + switch (field.getCategory()) { + case PRIMITIVE: + convertSourceWritable = convertPrimitiveRowColumn(batchIndex, field); + break; + case LIST: + convertSourceWritable = convertListRowColumn(colVector, batchIndex, field); + break; + case MAP: + convertSourceWritable = convertMapRowColumn(colVector, batchIndex, field); + break; + case STRUCT: + convertSourceWritable = convertStructRowColumn(colVector, batchIndex, field); + break; + case UNION: + convertSourceWritable = convertUnionRowColumn(colVector, batchIndex, field); + break; + default: + throw new RuntimeException(); + } + + /* + * Convert our source object we just read into the target object and store that in the + * VectorizedRowBatch. + */ + convertVectorAssignRow.assignConvertRowColumn( + batch, batchIndex, logicalColumnIndex, convertSourceWritable); + } + + private Object convertComplexFieldRowColumn(ColumnVector colVector, int batchIndex, + Field field) throws IOException { + + if (!deserializeRead.readComplexField()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + return null; + } + + colVector.isNull[batchIndex] = false; + switch (field.getCategory()) { case PRIMITIVE: + return convertPrimitiveRowColumn(batchIndex, field); + case LIST: + return convertListRowColumn(colVector, batchIndex, field); + case MAP: + return convertMapRowColumn(colVector, batchIndex, field); + case STRUCT: + return convertStructRowColumn(colVector, batchIndex, field); + case UNION: + return convertUnionRowColumn(colVector, batchIndex, field); + default: + throw new RuntimeException(); + } + } + + private Object convertPrimitiveRowColumn(int batchIndex, Field field) throws IOException { + + Object writable = field.getConversionWritable(); + switch (field.getPrimitiveCategory()) { + case VOID: + writable = null; + break; + case BOOLEAN: { - switch (field.getPrimitiveCategory()) { - case VOID: - convertSourceWritable = null; - break; - case BOOLEAN: - ((BooleanWritable) convertSourceWritable).set(deserializeRead.currentBoolean); - break; - case BYTE: - ((ByteWritable) convertSourceWritable).set(deserializeRead.currentByte); - break; - case SHORT: - ((ShortWritable) convertSourceWritable).set(deserializeRead.currentShort); - break; - case INT: - ((IntWritable) convertSourceWritable).set(deserializeRead.currentInt); - break; - case LONG: - ((LongWritable) convertSourceWritable).set(deserializeRead.currentLong); - break; - case TIMESTAMP: - ((TimestampWritable) convertSourceWritable).set(deserializeRead.currentTimestampWritable); - break; - case DATE: - ((DateWritable) convertSourceWritable).set(deserializeRead.currentDateWritable); - break; - case FLOAT: - ((FloatWritable) convertSourceWritable).set(deserializeRead.currentFloat); - break; - case DOUBLE: - ((DoubleWritable) convertSourceWritable).set(deserializeRead.currentDouble); - break; - case BINARY: - if (deserializeRead.currentBytes == null) { - LOG.info( - "null binary entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } + if (writable == null) { + writable = new BooleanWritable(); + } + ((BooleanWritable) writable).set(deserializeRead.currentBoolean); + } + break; + case BYTE: + { + if (writable == null) { + writable = new ByteWritable(); + } + ((ByteWritable) writable).set(deserializeRead.currentByte); + } + break; + case SHORT: + { + if (writable == null) { + writable = new ShortWritable(); + } + ((ShortWritable) writable).set(deserializeRead.currentShort); + } + break; + case INT: + { + if (writable == null) { + writable = new IntWritable(); + } + ((IntWritable) writable).set(deserializeRead.currentInt); + } + break; + case LONG: + { + if (writable == null) { + writable = new LongWritable(); + } + ((LongWritable) writable).set(deserializeRead.currentLong); + } + break; + case TIMESTAMP: + { + if (writable == null) { + writable = new TimestampWritable(); + } + ((TimestampWritable) writable).set(deserializeRead.currentTimestampWritable); + } + break; + case DATE: + { + if (writable == null) { + writable = new DateWritable(); + } + ((DateWritable) writable).set(deserializeRead.currentDateWritable); + } + break; + case FLOAT: + { + if (writable == null) { + writable = new FloatWritable(); + } + ((FloatWritable) writable).set(deserializeRead.currentFloat); + } + break; + case DOUBLE: + { + if (writable == null) { + writable = new DoubleWritable(); + } + ((DoubleWritable) writable).set(deserializeRead.currentDouble); + } + break; + case BINARY: + { + if (writable == null) { + writable = new BytesWritable(); + } + if (deserializeRead.currentBytes == null) { + LOG.info( + "null binary entry: batchIndex " + batchIndex); + } + + ((BytesWritable) writable).set( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength); + break; + } + case STRING: + { + if (writable == null) { + writable = new Text(); + } + if (deserializeRead.currentBytes == null) { + throw new RuntimeException( + "null string entry: batchIndex " + batchIndex); + } - ((BytesWritable) convertSourceWritable).set( + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + ((Text) writable).set( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength); + } + break; + case VARCHAR: + { + if (writable == null) { + writable = new HiveVarcharWritable(); + } + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + if (deserializeRead.currentBytes == null) { + throw new RuntimeException( + "null varchar entry: batchIndex " + batchIndex); + } + + int adjustedLength = StringExpr.truncate( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength, + field.getMaxLength()); + + ((HiveVarcharWritable) writable).set( + new String( deserializeRead.currentBytes, deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength); - break; - case STRING: - if (deserializeRead.currentBytes == null) { - throw new RuntimeException( - "null string entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } + adjustedLength, + Charsets.UTF_8), + -1); + } + break; + case CHAR: + { + if (writable == null) { + writable = new HiveCharWritable(); + } + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + if (deserializeRead.currentBytes == null) { + throw new RuntimeException( + "null char entry: batchIndex " + batchIndex); + } + + int adjustedLength = StringExpr.rightTrimAndTruncate( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength, + field.getMaxLength()); - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - ((Text) convertSourceWritable).set( + ((HiveCharWritable) writable).set( + new String( deserializeRead.currentBytes, deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength); - break; - case VARCHAR: - { - // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method - // that does not use Java String objects. - if (deserializeRead.currentBytes == null) { - throw new RuntimeException( - "null varchar entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } - - int adjustedLength = StringExpr.truncate( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength, - field.getMaxLength()); - - ((HiveVarcharWritable) convertSourceWritable).set( - new String( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - adjustedLength, - Charsets.UTF_8), - -1); - } - break; - case CHAR: - { - // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method - // that does not use Java String objects. - if (deserializeRead.currentBytes == null) { - throw new RuntimeException( - "null char entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } - - int adjustedLength = StringExpr.rightTrimAndTruncate( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength, - field.getMaxLength()); - - ((HiveCharWritable) convertSourceWritable).set( - new String( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - adjustedLength, Charsets.UTF_8), - -1); - } - break; - case DECIMAL: - ((HiveDecimalWritable) convertSourceWritable).set( - deserializeRead.currentHiveDecimalWritable); - break; - case INTERVAL_YEAR_MONTH: - ((HiveIntervalYearMonthWritable) convertSourceWritable).set( - deserializeRead.currentHiveIntervalYearMonthWritable); - break; - case INTERVAL_DAY_TIME: - ((HiveIntervalDayTimeWritable) convertSourceWritable).set( - deserializeRead.currentHiveIntervalDayTimeWritable); - break; - default: - throw new RuntimeException("Primitive category " + field.getPrimitiveCategory() + - " not supported"); + adjustedLength, Charsets.UTF_8), + -1); + } + break; + case DECIMAL: + { + if (writable == null) { + writable = new HiveDecimalWritable(); + } + ((HiveDecimalWritable) writable).set( + deserializeRead.currentHiveDecimalWritable); + } + break; + case INTERVAL_YEAR_MONTH: + { + if (writable == null) { + writable = new HiveIntervalYearMonthWritable(); + } + ((HiveIntervalYearMonthWritable) writable).set( + deserializeRead.currentHiveIntervalYearMonthWritable); + } + break; + case INTERVAL_DAY_TIME: + { + if (writable == null) { + writable = new HiveIntervalDayTimeWritable(); } + ((HiveIntervalDayTimeWritable) writable).set( + deserializeRead.currentHiveIntervalDayTimeWritable); } break; + default: + throw new RuntimeException("Primitive category " + field.getPrimitiveCategory() + + " not supported"); + } + return writable; + } - case STRUCT: - case UNION: - // The only aspect of conversion to Struct / Union themselves is add fields as NULL on the end - // (no removal from end? which would mean skipping fields...) + private Object convertListRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { - // UNDONE - break; + final SettableListObjectInspector listOI = (SettableListObjectInspector) field.objectInspector; + final ListComplexTypeHelper listHelper = (ListComplexTypeHelper) field.getComplexHelper(); + final Field elementField = listHelper.getElementField(); + final List tempList = new ArrayList<>(); + final ListColumnVector listColumnVector = (ListColumnVector) colVector; - case LIST: - case MAP: - // Conversion only happens below to List elements or Map key and/or values and not to the - // List or Map itself. - default: - throw new RuntimeException("Category " + field.getCategory() + " not supported"); + while (deserializeRead.isNextComplexMultiValue()) { + tempList.add( + convertComplexFieldRowColumn(listColumnVector.child, batchIndex, elementField)); } - /* - * Convert our source object we just read into the target object and store that in the - * VectorizedRowBatch. - */ - convertVectorAssignRow.assignConvertRowColumn(batch, batchIndex, logicalColumnIndex, - convertSourceWritable); + final int size = tempList.size(); + final Object list = listOI.create(size); + for (int i = 0; i < size; i++) { + listOI.set(list, i, tempList.get(i)); + } + return list; + } + + private Object convertMapRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { + + final SettableMapObjectInspector mapOI = (SettableMapObjectInspector) field.objectInspector; + final MapComplexTypeHelper mapHelper = (MapComplexTypeHelper) field.getComplexHelper(); + final Field keyField = mapHelper.getKeyField(); + final Field valueField = mapHelper.getValueField(); + final MapColumnVector mapColumnVector = (MapColumnVector) colVector; + + final Object map = mapOI.create(); + while (deserializeRead.isNextComplexMultiValue()) { + final Object key = convertComplexFieldRowColumn(mapColumnVector.keys, batchIndex, keyField); + final Object value = convertComplexFieldRowColumn(mapColumnVector.values, batchIndex, valueField); + mapOI.put(map, key, value); + } + return map; + } + + private Object convertStructRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { + + final SettableStructObjectInspector structOI = (SettableStructObjectInspector) field.objectInspector; + final List structFields = structOI.getAllStructFieldRefs(); + final StructComplexTypeHelper structHelper = (StructComplexTypeHelper) field.getComplexHelper(); + final Field[] fields = structHelper.getFields(); + final StructColumnVector structColumnVector = (StructColumnVector) colVector; + + final Object struct = structOI.create(); + for (int i = 0; i < fields.length; i++) { + final Object fieldObject = + convertComplexFieldRowColumn(structColumnVector.fields[i], batchIndex, fields[i]); + structOI.setStructFieldData(struct, structFields.get(i), fieldObject); + } + deserializeRead.finishComplexVariableFieldsType(); + return struct; + } + + private Object convertUnionRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { + + final SettableUnionObjectInspector unionOI = (SettableUnionObjectInspector) field.objectInspector; + final UnionComplexTypeHelper unionHelper = (UnionComplexTypeHelper) field.getComplexHelper(); + final Field[] fields = unionHelper.getFields(); + final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector; + + final Object union = unionOI.create(); + final int tag = deserializeRead.currentInt; + unionOI.addField(union, new StandardUnion((byte) tag, + convertComplexFieldRowColumn(unionColumnVector.fields[tag], batchIndex, fields[tag]))); + deserializeRead.finishComplexVariableFieldsType(); + return union; } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index 1201499e23..23fdaa554f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -257,8 +257,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - LOG.info("null binary entry: batchIndex " + batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } BytesWritable bytesWritable = (BytesWritable) primitiveWritable; @@ -273,8 +279,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. @@ -289,8 +301,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } final int adjustedLength = StringExpr.truncate(bytes, start, length, @@ -308,8 +326,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, diff --git ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q index 02f7c751e7..131a1af262 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vec_part_all_complex.q @@ -12,9 +12,6 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=textfile; set hive.llap.io.enabled=false; --- TEMPORARY UNTIL Vectorized Text Schema Evolution works. -set hive.vectorized.complex.types.enabled=false; - -- SORT_QUERY_RESULTS -- -- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Partitioned --> all complex conversions diff --git ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q index d780074bc7..b4a9d664e2 100644 --- ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q +++ ql/src/test/queries/clientpositive/schema_evol_text_vecrow_part_all_complex.q @@ -12,9 +12,6 @@ set hive.metastore.disallow.incompatible.col.type.changes=false; set hive.default.fileformat=textfile; set hive.llap.io.enabled=false; --- TEMPORARY UNTIL Vectorized Text Schema Evolution works. -set hive.vectorized.complex.types.enabled=false; - -- SORT_QUERY_RESULTS -- -- FILE VARIATION: TEXTFILE, Non-Vectorized, MapWork, Partitioned --> all complex conversions diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index 0ea29727ce..5e0c8f709a 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -156,24 +156,42 @@ STAGE PLANS: TableScan alias: part_change_various_various_struct1 Statistics: Num rows: 6 Data size: 931 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, s1:struct, b:string + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator @@ -438,24 +456,42 @@ STAGE PLANS: TableScan alias: part_add_various_various_struct2 Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, b:string, s2:struct + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator @@ -648,24 +684,42 @@ STAGE PLANS: TableScan alias: part_add_to_various_various_struct4 Statistics: Num rows: 4 Data size: 353 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, b:string, s3:struct + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index 70be462886..afaf69faf3 100644 --- ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -156,24 +156,42 @@ STAGE PLANS: TableScan alias: part_change_various_various_struct1 Statistics: Num rows: 6 Data size: 931 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), s1 (type: struct), b (type: string) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, s1:struct, b:string + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator @@ -438,24 +456,42 @@ STAGE PLANS: TableScan alias: part_add_various_various_struct2 Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, b:string, s2:struct + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator @@ -648,24 +684,42 @@ STAGE PLANS: TableScan alias: part_add_to_various_various_struct4 Statistics: Num rows: 4 Data size: 353 Basic stats: COMPLETE Column stats: PARTIAL + TableScan Vectorization: + native: true + projectedOutputColumns: [0, 1, 2, 3] Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s3 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0, 3, 1, 2] Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap + Execution mode: vectorized, llap Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.row.serde.deserialize IS true + groupByVectorOutput: true inputFileFormats: org.apache.hadoop.mapred.TextInputFormat - notVectorizedReason: Select expression for SELECT operator: Vectorizing complex type STRUCT not enabled (struct) since hive.vectorized.complex.types.enabled IS false - vectorized: false + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: insert_num:int, b:string, s3:struct + partitionColumnCount: 1 + partitionColumns: part:int Stage: Stage-0 Fetch Operator diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java index 9b3f3d0fa8..8cf7c47145 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java @@ -1060,6 +1060,13 @@ public boolean readComplexField() throws IOException { return doReadField(fields[structHelper.nextFieldIndex++]); } else { + // Parse until field separator (currentLevel). + fieldEnd = parseComplexField(fieldPosition, complexFieldEnd, currentLevel); + currentFieldLength = fieldEnd - fieldPosition; + + structHelper.nextFieldIndex = 0; + boolean result = doReadField(fields[fields.length - 1]); + if (!isEscaped) { // No parsing necessary -- the end is the parent's end. @@ -1067,13 +1074,10 @@ public boolean readComplexField() throws IOException { currentEscapeCount = 0; } else { // We must parse to get the escape count. - fieldEnd = parseComplexField(fieldPosition, complexFieldEnd, currentLevel - 1); + parseComplexField(fieldPosition, complexFieldEnd, currentLevel - 1); } - currentFieldLength = complexFieldEnd - fieldPosition; - - structHelper.nextFieldIndex = 0; - return doReadField(fields[fields.length - 1]); + return result; } } case UNION: diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index a6cda4e818..886c298852 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -842,7 +842,7 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { } break; case BOOLEAN: - result = String.valueOf((((BooleanObjectInspector) oi).get(o))); + result = (((BooleanObjectInspector) oi).get(o)) ? "TRUE" : "FALSE"; break; case BYTE: result = String.valueOf((((ByteObjectInspector) oi).get(o)));