diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index b0d1c75589..ff3b359226 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -23,7 +23,10 @@ import java.util.List; import java.util.Map; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; @@ -53,7 +56,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -106,7 +108,7 @@ * These members have information for data type conversion. * Not defined if there is no conversion. */ - PrimitiveObjectInspector[] convertSourcePrimitiveObjectInspectors; + ObjectInspector[] convertSourceOI; // The primitive object inspector of the source data type for any column being // converted. Otherwise, null. @@ -128,7 +130,7 @@ private void allocateArrays(int count) { * Allocate the source conversion related arrays (optional). */ private void allocateConvertArrays(int count) { - convertSourcePrimitiveObjectInspectors = new PrimitiveObjectInspector[count]; + convertSourceOI = new ObjectInspector[count]; convertTargetWritables = new Writable[count]; } @@ -163,12 +165,10 @@ private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, Ty private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) { isConvert[logicalColumnIndex] = true; final Category convertSourceCategory = convertSourceTypeInfo.getCategory(); - if (convertSourceCategory == Category.PRIMITIVE) { - final PrimitiveTypeInfo convertSourcePrimitiveTypeInfo = (PrimitiveTypeInfo) convertSourceTypeInfo; - convertSourcePrimitiveObjectInspectors[logicalColumnIndex] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - convertSourcePrimitiveTypeInfo); + convertSourceOI[logicalColumnIndex] = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(convertSourceTypeInfo); + if (convertSourceCategory == Category.PRIMITIVE) { // These need to be based on the target. final PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory(); @@ -248,6 +248,15 @@ public void init(List typeNames) throws HiveException { } } + /* + * Initialize using one target data type info. + */ + public void init(TypeInfo typeInfo, int outputColumnNum) throws HiveException { + + allocateArrays(1); + initTargetEntry(0, outputColumnNum, typeInfo); + } + /** * Initialize for conversion from a provided (source) data types to the target data types * desired in the VectorizedRowBatch. @@ -289,36 +298,29 @@ public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos } else { final TypeInfo targetTypeInfo = targetTypeInfos[i]; + final TypeInfo sourceTypeInfo = sourceTypeInfos[i]; - if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) { + if (!sourceTypeInfo.equals(targetTypeInfo)) { - // For now, we don't have an assigner for complex types... + if (VectorPartitionConversion.isImplicitVectorColumnConversion( + sourceTypeInfo, targetTypeInfo)) { - } else { - final TypeInfo sourceTypeInfo = sourceTypeInfos[i]; - - if (!sourceTypeInfo.equals(targetTypeInfo)) { - - if (VectorPartitionConversion.isImplicitVectorColumnConversion( - sourceTypeInfo, targetTypeInfo)) { - - // Do implicit conversion accepting the source type and putting it in the same - // target type ColumnVector type. - initTargetEntry(i, i, sourceTypeInfo); - - } else { + // Do implicit conversion accepting the source type and putting it in the same + // target type ColumnVector type. + initTargetEntry(i, i, sourceTypeInfo); - // Do formal conversion... - initTargetEntry(i, i, targetTypeInfo); - initConvertSourceEntry(i, sourceTypeInfo); - - } } else { - // No conversion. + // Do formal conversion... initTargetEntry(i, i, targetTypeInfo); + initConvertSourceEntry(i, sourceTypeInfo); } + } else { + + // No conversion. + initTargetEntry(i, i, targetTypeInfo); + } } } @@ -365,53 +367,111 @@ private void assignRowColumn( VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; case BOOLEAN: - ((LongColumnVector) columnVector).vector[batchIndex] = - (((BooleanWritable) object).get() ? 1 : 0); + if (object instanceof Boolean) { + ((LongColumnVector) columnVector).vector[batchIndex] = + (((Boolean) object) ? 1 : 0); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + (((BooleanWritable) object).get() ? 1 : 0); + } break; case BYTE: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((ByteWritable) object).get(); + if (object instanceof Byte) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Byte) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((ByteWritable) object).get(); + } break; case SHORT: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((ShortWritable) object).get(); + if (object instanceof Short) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Short) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((ShortWritable) object).get(); + } break; case INT: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((IntWritable) object).get(); + if (object instanceof Integer) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Integer) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((IntWritable) object).get(); + } break; case LONG: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((LongWritable) object).get(); + if (object instanceof Long) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((Long) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((LongWritable) object).get(); + } break; case TIMESTAMP: - ((TimestampColumnVector) columnVector).set( - batchIndex, ((TimestampWritable) object).getTimestamp()); + if (object instanceof Timestamp) { + ((TimestampColumnVector) columnVector).set( + batchIndex, ((Timestamp) object)); + } else { + ((TimestampColumnVector) columnVector).set( + batchIndex, ((TimestampWritable) object).getTimestamp()); + } break; case DATE: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((DateWritable) object).getDays(); + if (object instanceof Date) { + ((LongColumnVector) columnVector).vector[batchIndex] = + DateWritable.dateToDays((Date) object); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((DateWritable) object).getDays(); + } break; case FLOAT: - ((DoubleColumnVector) columnVector).vector[batchIndex] = - ((FloatWritable) object).get(); + if (object instanceof Float) { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((Float) object); + } else { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((FloatWritable) object).get(); + } break; case DOUBLE: - ((DoubleColumnVector) columnVector).vector[batchIndex] = - ((DoubleWritable) object).get(); + if (object instanceof Double) { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((Double) object); + } else { + ((DoubleColumnVector) columnVector).vector[batchIndex] = + ((DoubleWritable) object).get(); + } break; case BINARY: { - BytesWritable bw = (BytesWritable) object; - ((BytesColumnVector) columnVector).setVal( - batchIndex, bw.getBytes(), 0, bw.getLength()); + if (object instanceof byte[]) { + byte[] bytes = (byte[]) object; + ((BytesColumnVector) columnVector).setVal( + batchIndex, bytes, 0, bytes.length); + } else { + BytesWritable bw = (BytesWritable) object; + ((BytesColumnVector) columnVector).setVal( + batchIndex, bw.getBytes(), 0, bw.getLength()); + } } break; case STRING: { - Text tw = (Text) object; - ((BytesColumnVector) columnVector).setVal( - batchIndex, tw.getBytes(), 0, tw.getLength()); + if (object instanceof String) { + String string = (String) object; + byte[] bytes = string.getBytes(); + ((BytesColumnVector) columnVector).setVal( + batchIndex, bytes, 0, bytes.length); + } else { + Text tw = (Text) object; + ((BytesColumnVector) columnVector).setVal( + batchIndex, tw.getBytes(), 0, tw.getLength()); + } } break; case VARCHAR: @@ -463,12 +523,22 @@ private void assignRowColumn( } break; case INTERVAL_YEAR_MONTH: - ((LongColumnVector) columnVector).vector[batchIndex] = - ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths(); + if (object instanceof HiveIntervalYearMonth) { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((HiveIntervalYearMonth) object).getTotalMonths(); + } else { + ((LongColumnVector) columnVector).vector[batchIndex] = + ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths(); + } break; case INTERVAL_DAY_TIME: - ((IntervalDayTimeColumnVector) columnVector).set( - batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime()); + if (object instanceof HiveIntervalDayTime) { + ((IntervalDayTimeColumnVector) columnVector).set( + batchIndex, (HiveIntervalDayTime) object); + } else { + ((IntervalDayTimeColumnVector) columnVector).set( + batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime()); + } break; default: throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + @@ -517,18 +587,18 @@ private void assignRowColumn( case STRUCT: { final StructColumnVector structColumnVector = (StructColumnVector) columnVector; - final StructTypeInfo structTypeInfo = (StructTypeInfo) targetTypeInfo; - final List fieldStructTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); - final int size = fieldStructTypeInfos.size(); + final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo; + final List targetFieldTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos(); + final int size = targetFieldTypeInfos.size(); if (object instanceof List) { final List struct = (List) object; for (int i = 0; i < size; i++) { - assignRowColumn(structColumnVector.fields[i], batchIndex, fieldStructTypeInfos.get(i), struct.get(i)); + assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), struct.get(i)); } } else { final Object[] array = (Object[]) object; for (int i = 0; i < size; i++) { - assignRowColumn(structColumnVector.fields[i], batchIndex, fieldStructTypeInfos.get(i), array[i]); + assignRowColumn(structColumnVector.fields[i], batchIndex, targetFieldTypeInfos.get(i), array[i]); } } } @@ -569,114 +639,136 @@ private void assignRowColumn( */ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, Object object) { + Preconditions.checkState(isConvert[logicalColumnIndex]); - final Category targetCategory = targetTypeInfos[logicalColumnIndex].getCategory(); + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; + assignConvertRowColumn( + batch.cols[projectionColumnNum], + batchIndex, + targetTypeInfos[logicalColumnIndex], + convertSourceOI[logicalColumnIndex], + convertTargetWritables[logicalColumnIndex], + object); + } + + private void assignConvertRowColumn(ColumnVector columnVector, int batchIndex, + TypeInfo targetTypeInfo, ObjectInspector sourceObjectInspector, + Writable convertTargetWritable, Object object) { + + final Category targetCategory = targetTypeInfo.getCategory(); if (targetCategory == null) { /* * This is a column that we don't want (i.e. not included) -- we are done. */ return; } - final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } try { switch (targetCategory) { case PRIMITIVE: + final PrimitiveObjectInspector sourcePrimitiveOI = + (PrimitiveObjectInspector) sourceObjectInspector; final PrimitiveCategory targetPrimitiveCategory = - ((PrimitiveTypeInfo) targetTypeInfos[logicalColumnIndex]).getPrimitiveCategory(); + ((PrimitiveTypeInfo) targetTypeInfo).getPrimitiveCategory(); switch (targetPrimitiveCategory) { case VOID: - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; case BOOLEAN: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = (PrimitiveObjectInspectorUtils.getBoolean( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]) ? 1 : 0); + object, sourcePrimitiveOI) ? 1 : 0); break; case BYTE: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getByte( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case SHORT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getShort( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case INT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getInt( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case LONG: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getLong( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case TIMESTAMP: { final Timestamp timestamp = PrimitiveObjectInspectorUtils.getTimestamp( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (timestamp == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( + ((TimestampColumnVector) columnVector).set( batchIndex, timestamp); } break; case DATE: { final Date date = PrimitiveObjectInspectorUtils.getDate( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (date == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - final DateWritable dateWritable = (DateWritable) convertTargetWritables[logicalColumnIndex]; + DateWritable dateWritable = (DateWritable) convertTargetWritable; + if (dateWritable == null) { + dateWritable = new DateWritable(); + } dateWritable.set(date); - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays(); } break; case FLOAT: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getFloat( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case DOUBLE: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((DoubleColumnVector) columnVector).vector[batchIndex] = PrimitiveObjectInspectorUtils.getDouble( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); break; case BINARY: { final BytesWritable bytesWritable = PrimitiveObjectInspectorUtils.getBinary( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (bytesWritable == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength()); } break; case STRING: { final String string = PrimitiveObjectInspectorUtils.getString( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (string == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - Text text = (Text) convertTargetWritables[logicalColumnIndex]; + Text text = (Text) convertTargetWritable; + if (text == null) { + text = new Text(); + } text.set(string); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, text.getBytes(), 0, text.getLength()); } break; @@ -686,16 +778,16 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, final HiveVarchar hiveVarchar = PrimitiveObjectInspectorUtils.getHiveVarchar( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (hiveVarchar == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // TODO: Do we need maxLength checking? byte[] bytes = hiveVarchar.getValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytes, 0, bytes.length); } break; @@ -705,9 +797,9 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, final HiveChar hiveChar = PrimitiveObjectInspectorUtils.getHiveChar( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (hiveChar == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // We store CHAR in vector row batch with padding stripped. @@ -715,7 +807,7 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, // TODO: Do we need maxLength checking? final byte[] bytes = hiveChar.getStrippedValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytes, 0, bytes.length); } break; @@ -723,12 +815,12 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, { final HiveDecimal hiveDecimal = PrimitiveObjectInspectorUtils.getHiveDecimal( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (hiveDecimal == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( + ((DecimalColumnVector) columnVector).set( batchIndex, hiveDecimal); } break; @@ -736,12 +828,12 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, { final HiveIntervalYearMonth intervalYearMonth = PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (intervalYearMonth == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths(); } break; @@ -749,12 +841,12 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, { final HiveIntervalDayTime intervalDayTime = PrimitiveObjectInspectorUtils.getHiveIntervalDayTime( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + object, sourcePrimitiveOI); if (intervalDayTime == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set( + ((IntervalDayTimeColumnVector) columnVector).set( batchIndex, intervalDayTime); } break; @@ -763,18 +855,113 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, " not supported"); } break; + case LIST: + { + final ListColumnVector listColumnVector = (ListColumnVector) columnVector; + final ListObjectInspector sourceListOI = (ListObjectInspector) sourceObjectInspector; + final ObjectInspector sourceElementOI = sourceListOI.getListElementObjectInspector(); + final int size = sourceListOI.getListLength(object); + final TypeInfo targetElementTypeInfo = ((ListTypeInfo) targetTypeInfo).getListElementTypeInfo(); + + listColumnVector.offsets[batchIndex] = listColumnVector.childCount; + listColumnVector.childCount += size; + listColumnVector.ensureSize(listColumnVector.childCount, true); + listColumnVector.lengths[batchIndex] = size; + + for (int i = 0; i < size; i++) { + final Object element = sourceListOI.getListElement(object, i); + final int offset = (int) (listColumnVector.offsets[batchIndex] + i); + assignConvertRowColumn( + listColumnVector.child, + offset, + targetElementTypeInfo, + sourceElementOI, + null, + element); + } + } + break; + case MAP: + { + final MapColumnVector mapColumnVector = (MapColumnVector) columnVector; + final MapObjectInspector mapObjectInspector = (MapObjectInspector) sourceObjectInspector; + final MapTypeInfo mapTypeInfo = (MapTypeInfo) targetTypeInfo; + + final Map map = mapObjectInspector.getMap(object); + for (Map.Entry entry : map.entrySet()) { + assignConvertRowColumn( + mapColumnVector.keys, + batchIndex, + mapTypeInfo.getMapKeyTypeInfo(), + mapObjectInspector.getMapKeyObjectInspector(), + null, + entry.getKey()); + assignConvertRowColumn( + mapColumnVector.values, + batchIndex, + mapTypeInfo.getMapValueTypeInfo(), + mapObjectInspector.getMapValueObjectInspector(), + null, + entry.getValue()); + } + } + break; + case STRUCT: + { + final StructColumnVector structColumnVector = (StructColumnVector) columnVector; + final StructObjectInspector sourceStructOI = (StructObjectInspector) sourceObjectInspector; + final List sourceFields = sourceStructOI.getAllStructFieldRefs(); + final StructTypeInfo targetStructTypeInfo = (StructTypeInfo) targetTypeInfo; + final List targetTypeInfos = targetStructTypeInfo.getAllStructFieldTypeInfos(); + final int size = targetTypeInfos.size(); + + for (int i = 0; i < size; i++) { + if (i < sourceFields.size()) { + final StructField sourceStructField = sourceFields.get(i); + final ObjectInspector sourceFieldOI = sourceStructField.getFieldObjectInspector(); + final Object sourceData = sourceStructOI.getStructFieldData(object, sourceStructField); + assignConvertRowColumn( + structColumnVector.fields[i], + batchIndex, + targetTypeInfos.get(i), + sourceFieldOI, + null, + sourceData); + } else { + final ColumnVector fieldColumnVector = structColumnVector.fields[i]; + VectorizedBatchUtil.setNullColIsNullValue(fieldColumnVector, batchIndex); + } + } + } + break; + case UNION: + { + final UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector; + final UnionObjectInspector unionObjectInspector = (UnionObjectInspector) sourceObjectInspector; + final UnionTypeInfo unionTypeInfo = (UnionTypeInfo) targetTypeInfo; + final int tag = unionObjectInspector.getTag(object); + + assignConvertRowColumn( + unionColumnVector.fields[tag], + batchIndex, + unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), + unionObjectInspector.getObjectInspectors().get(tag), + null, + unionObjectInspector.getField(tag)); + } + break; default: throw new RuntimeException("Category " + targetCategory.name() + " not supported"); } } catch (NumberFormatException e) { // Some of the conversion methods throw this exception on numeric parsing errors. - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // We always set the null flag to false when there is a value. - batch.cols[projectionColumnNum].isNull[batchIndex] = false; + columnVector.isNull[batchIndex] = false; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index e37816f0c4..c7c667f9aa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -40,7 +40,16 @@ import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; @@ -55,7 +64,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; import com.google.common.base.Charsets; import com.google.common.base.Preconditions; @@ -112,7 +120,7 @@ private VectorDeserializeRow() { * This member has information for data type conversion. * Not defined if there is no conversion. */ - Writable conversionWritable; + private Object conversionWritable; // Conversion requires source be placed in writable so we can call upon // VectorAssignRow to convert and assign the row column. @@ -120,6 +128,8 @@ private VectorDeserializeRow() { // For a complex type, a helper object that describes elements, key/value pairs, // or fields. + private ObjectInspector objectInspector; + public Field(PrimitiveCategory primitiveCategory, int maxLength) { this.category = Category.PRIMITIVE; this.primitiveCategory = primitiveCategory; @@ -127,10 +137,13 @@ public Field(PrimitiveCategory primitiveCategory, int maxLength) { this.isConvert = false; this.conversionWritable = null; this.complexTypeHelper = null; + this.objectInspector = PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector(primitiveCategory); } - public Field(Category category, ComplexTypeHelper complexTypeHelper) { + public Field(Category category, ComplexTypeHelper complexTypeHelper, TypeInfo typeInfo) { this.category = category; + this.objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); this.primitiveCategory = null; this.maxLength = 0; this.isConvert = false; @@ -158,17 +171,21 @@ public boolean getIsConvert() { return isConvert; } - public void setConversionWritable(Writable conversionWritable) { + public void setConversionWritable(Object conversionWritable) { this.conversionWritable = conversionWritable; } - public Writable getConversionWritable() { + public Object getConversionWritable() { return conversionWritable; } public ComplexTypeHelper getComplexHelper() { return complexTypeHelper; } + + public ObjectInspector getObjectInspector() { + return objectInspector; + } } /* @@ -191,7 +208,7 @@ public ComplexTypeHelper getComplexHelper() { private Field topLevelFields[]; - VectorAssignRow convertVectorAssignRow; + private VectorAssignRow convertVectorAssignRow; // Use its conversion ability. /* @@ -231,7 +248,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { final ListComplexTypeHelper listHelper = new ListComplexTypeHelper( allocateField(listTypeInfo.getListElementTypeInfo())); - return new Field(category, listHelper); + return new Field(category, listHelper, sourceTypeInfo); } case MAP: { @@ -240,7 +257,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { new MapComplexTypeHelper( allocateField(mapTypeInfo.getMapKeyTypeInfo()), allocateField(mapTypeInfo.getMapValueTypeInfo())); - return new Field(category, mapHelper); + return new Field(category, mapHelper, sourceTypeInfo); } case STRUCT: { @@ -253,7 +270,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { } final StructComplexTypeHelper structHelper = new StructComplexTypeHelper(fields); - return new Field(category, structHelper); + return new Field(category, structHelper, sourceTypeInfo); } case UNION: { @@ -266,7 +283,7 @@ private Field allocateComplexField(TypeInfo sourceTypeInfo) { } final UnionComplexTypeHelper unionHelper = new UnionComplexTypeHelper(fields); - return new Field(category, unionHelper); + return new Field(category, unionHelper, sourceTypeInfo); } default: throw new RuntimeException("Category " + category + " not supported"); @@ -711,8 +728,7 @@ private void storeComplexFieldRowColumn(ColumnVector fieldColVector, Field field, int batchIndex, boolean canRetainByteRef) throws IOException { if (!deserializeRead.readComplexField()) { - fieldColVector.isNull[batchIndex] = true; - fieldColVector.noNulls = false; + VectorizedBatchUtil.setNullColIsNullValue(fieldColVector, batchIndex); return; } @@ -735,6 +751,8 @@ private void storeComplexFieldRowColumn(ColumnVector fieldColVector, default: throw new RuntimeException("Category " + field.getCategory() + " not supported"); } + + fieldColVector.isNull[batchIndex] = false; } private void storeListRowColumn(ColumnVector colVector, @@ -867,7 +885,11 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, Field field, int logicalColumnIndex, boolean canRetainByteRef) throws IOException { final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; - ColumnVector colVector = batch.cols[projectionColumnNum]; + final ColumnVector colVector = batch.cols[projectionColumnNum]; + if (!deserializeRead.readNextField()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + return; + } switch (field.getCategory()) { case PRIMITIVE: @@ -890,7 +912,7 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, } // We always set the null flag to false when there is a value. - batch.cols[projectionColumnNum].isNull[batchIndex] = false; + colVector.isNull[batchIndex] = false; } /** @@ -907,156 +929,345 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, Field field, int logicalColumnIndex) throws IOException { - Writable convertSourceWritable = field.getConversionWritable(); + final int projectionColumnIndex = projectionColumnNums[logicalColumnIndex]; + final ColumnVector colVector = batch.cols[projectionColumnIndex]; + if (!deserializeRead.readNextField()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + return; + } + + final Object convertSourceWritable; switch (field.getCategory()) { case PRIMITIVE: + convertSourceWritable = + convertPrimitiveRowColumn(batchIndex, field); + break; + case LIST: + convertSourceWritable = + convertListRowColumn(colVector, batchIndex, field); + break; + case MAP: + convertSourceWritable = + convertMapRowColumn(colVector, batchIndex, field); + break; + case STRUCT: + convertSourceWritable = + convertStructRowColumn(colVector, batchIndex, field); + break; + case UNION: + convertSourceWritable = + convertUnionRowColumn(colVector, batchIndex, field); + break; + default: + throw new RuntimeException(); + } + + /* + * Convert our source object we just read into the target object and store that in the + * VectorizedRowBatch. + */ + convertVectorAssignRow.assignConvertRowColumn( + batch, batchIndex, logicalColumnIndex, convertSourceWritable); + } + + private Object convertComplexFieldRowColumn(ColumnVector colVector, int batchIndex, + Field field) throws IOException { + + if (!deserializeRead.readComplexField()) { + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); + return null; + } + + colVector.isNull[batchIndex] = false; + + switch (field.getCategory()) { + case PRIMITIVE: + return convertPrimitiveRowColumn(batchIndex, field); + case LIST: + return convertListRowColumn(colVector, batchIndex, field); + case MAP: + return convertMapRowColumn(colVector, batchIndex, field); + case STRUCT: + return convertStructRowColumn(colVector, batchIndex, field); + case UNION: + return convertUnionRowColumn(colVector, batchIndex, field); + default: + throw new RuntimeException(); + } + } + + private Object convertPrimitiveRowColumn(int batchIndex, Field field) throws IOException { + + Object writable = field.getConversionWritable(); + switch (field.getPrimitiveCategory()) { + case VOID: + writable = null; + break; + case BOOLEAN: { - switch (field.getPrimitiveCategory()) { - case VOID: - convertSourceWritable = null; - break; - case BOOLEAN: - ((BooleanWritable) convertSourceWritable).set(deserializeRead.currentBoolean); - break; - case BYTE: - ((ByteWritable) convertSourceWritable).set(deserializeRead.currentByte); - break; - case SHORT: - ((ShortWritable) convertSourceWritable).set(deserializeRead.currentShort); - break; - case INT: - ((IntWritable) convertSourceWritable).set(deserializeRead.currentInt); - break; - case LONG: - ((LongWritable) convertSourceWritable).set(deserializeRead.currentLong); - break; - case TIMESTAMP: - ((TimestampWritable) convertSourceWritable).set(deserializeRead.currentTimestampWritable); - break; - case DATE: - ((DateWritable) convertSourceWritable).set(deserializeRead.currentDateWritable); - break; - case FLOAT: - ((FloatWritable) convertSourceWritable).set(deserializeRead.currentFloat); - break; - case DOUBLE: - ((DoubleWritable) convertSourceWritable).set(deserializeRead.currentDouble); - break; - case BINARY: - if (deserializeRead.currentBytes == null) { - LOG.info( - "null binary entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } + if (writable == null) { + writable = new BooleanWritable(); + } + ((BooleanWritable) writable).set(deserializeRead.currentBoolean); + } + break; + case BYTE: + { + if (writable == null) { + writable = new ByteWritable(); + } + ((ByteWritable) writable).set(deserializeRead.currentByte); + } + break; + case SHORT: + { + if (writable == null) { + writable = new ShortWritable(); + } + ((ShortWritable) writable).set(deserializeRead.currentShort); + } + break; + case INT: + { + if (writable == null) { + writable = new IntWritable(); + } + ((IntWritable) writable).set(deserializeRead.currentInt); + } + break; + case LONG: + { + if (writable == null) { + writable = new LongWritable(); + } + ((LongWritable) writable).set(deserializeRead.currentLong); + } + break; + case TIMESTAMP: + { + if (writable == null) { + writable = new TimestampWritable(); + } + ((TimestampWritable) writable).set(deserializeRead.currentTimestampWritable); + } + break; + case DATE: + { + if (writable == null) { + writable = new DateWritable(); + } + ((DateWritable) writable).set(deserializeRead.currentDateWritable); + } + break; + case FLOAT: + { + if (writable == null) { + writable = new FloatWritable(); + } + ((FloatWritable) writable).set(deserializeRead.currentFloat); + } + break; + case DOUBLE: + { + if (writable == null) { + writable = new DoubleWritable(); + } + ((DoubleWritable) writable).set(deserializeRead.currentDouble); + } + break; + case BINARY: + { + if (writable == null) { + writable = new BytesWritable(); + } + if (deserializeRead.currentBytes == null) { + LOG.info( + "null binary entry: batchIndex " + batchIndex); + } - ((BytesWritable) convertSourceWritable).set( + ((BytesWritable) writable).set( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength); + break; + } + case STRING: + { + if (writable == null) { + writable = new Text(); + } + if (deserializeRead.currentBytes == null) { + throw new RuntimeException( + "null string entry: batchIndex " + batchIndex); + } + + // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. + ((Text) writable).set( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength); + } + break; + case VARCHAR: + { + if (writable == null) { + writable = new HiveVarcharWritable(); + } + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + if (deserializeRead.currentBytes == null) { + throw new RuntimeException( + "null varchar entry: batchIndex " + batchIndex); + } + + int adjustedLength = StringExpr.truncate( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength, + field.getMaxLength()); + + ((HiveVarcharWritable) writable).set( + new String( deserializeRead.currentBytes, deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength); - break; - case STRING: - if (deserializeRead.currentBytes == null) { - throw new RuntimeException( - "null string entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } + adjustedLength, + Charsets.UTF_8), + -1); + } + break; + case CHAR: + { + if (writable == null) { + writable = new HiveCharWritable(); + } + // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method + // that does not use Java String objects. + if (deserializeRead.currentBytes == null) { + throw new RuntimeException( + "null char entry: batchIndex " + batchIndex); + } + + int adjustedLength = StringExpr.rightTrimAndTruncate( + deserializeRead.currentBytes, + deserializeRead.currentBytesStart, + deserializeRead.currentBytesLength, + field.getMaxLength()); - // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. - ((Text) convertSourceWritable).set( + ((HiveCharWritable) writable).set( + new String( deserializeRead.currentBytes, deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength); - break; - case VARCHAR: - { - // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method - // that does not use Java String objects. - if (deserializeRead.currentBytes == null) { - throw new RuntimeException( - "null varchar entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } - - int adjustedLength = StringExpr.truncate( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength, - field.getMaxLength()); - - ((HiveVarcharWritable) convertSourceWritable).set( - new String( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - adjustedLength, - Charsets.UTF_8), - -1); - } - break; - case CHAR: - { - // Use the basic STRING bytes read to get access, then use our optimal truncate/trim method - // that does not use Java String objects. - if (deserializeRead.currentBytes == null) { - throw new RuntimeException( - "null char entry: batchIndex " + batchIndex + " projection column num " + - projectionColumnNums[logicalColumnIndex]); - } - - int adjustedLength = StringExpr.rightTrimAndTruncate( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - deserializeRead.currentBytesLength, - field.getMaxLength()); - - ((HiveCharWritable) convertSourceWritable).set( - new String( - deserializeRead.currentBytes, - deserializeRead.currentBytesStart, - adjustedLength, Charsets.UTF_8), - -1); - } - break; - case DECIMAL: - ((HiveDecimalWritable) convertSourceWritable).set( - deserializeRead.currentHiveDecimalWritable); - break; - case INTERVAL_YEAR_MONTH: - ((HiveIntervalYearMonthWritable) convertSourceWritable).set( - deserializeRead.currentHiveIntervalYearMonthWritable); - break; - case INTERVAL_DAY_TIME: - ((HiveIntervalDayTimeWritable) convertSourceWritable).set( - deserializeRead.currentHiveIntervalDayTimeWritable); - break; - default: - throw new RuntimeException("Primitive category " + field.getPrimitiveCategory() + - " not supported"); + adjustedLength, Charsets.UTF_8), + -1); + } + break; + case DECIMAL: + { + if (writable == null) { + writable = new HiveDecimalWritable(); + } + ((HiveDecimalWritable) writable).set( + deserializeRead.currentHiveDecimalWritable); + } + break; + case INTERVAL_YEAR_MONTH: + { + if (writable == null) { + writable = new HiveIntervalYearMonthWritable(); } + ((HiveIntervalYearMonthWritable) writable).set( + deserializeRead.currentHiveIntervalYearMonthWritable); } break; + case INTERVAL_DAY_TIME: + { + if (writable == null) { + writable = new HiveIntervalDayTimeWritable(); + } + ((HiveIntervalDayTimeWritable) writable).set( + deserializeRead.currentHiveIntervalDayTimeWritable); + } + break; + default: + throw new RuntimeException("Primitive category " + field.getPrimitiveCategory() + + " not supported"); + } + return writable; + } - case STRUCT: - case UNION: - // The only aspect of conversion to Struct / Union themselves is add fields as NULL on the end - // (no removal from end? which would mean skipping fields...) + private Object convertListRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { - // UNDONE - break; + final SettableListObjectInspector listOI = (SettableListObjectInspector) field.objectInspector; + final ListComplexTypeHelper listHelper = (ListComplexTypeHelper) field.getComplexHelper(); + final Field elementField = listHelper.getElementField(); + final List tempList = new ArrayList<>(); + final ListColumnVector listColumnVector = (ListColumnVector) colVector; - case LIST: - case MAP: - // Conversion only happens below to List elements or Map key and/or values and not to the - // List or Map itself. - default: - throw new RuntimeException("Category " + field.getCategory() + " not supported"); + while (deserializeRead.isNextComplexMultiValue()) { + tempList.add( + convertComplexFieldRowColumn(listColumnVector.child, batchIndex, elementField)); } - /* - * Convert our source object we just read into the target object and store that in the - * VectorizedRowBatch. - */ - convertVectorAssignRow.assignConvertRowColumn(batch, batchIndex, logicalColumnIndex, - convertSourceWritable); + final int size = tempList.size(); + final Object list = listOI.create(size); + for (int i = 0; i < size; i++) { + listOI.set(list, i, tempList.get(i)); + } + return list; + } + + private Object convertMapRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { + + final SettableMapObjectInspector mapOI = (SettableMapObjectInspector) field.objectInspector; + final MapComplexTypeHelper mapHelper = (MapComplexTypeHelper) field.getComplexHelper(); + final Field keyField = mapHelper.getKeyField(); + final Field valueField = mapHelper.getValueField(); + final MapColumnVector mapColumnVector = (MapColumnVector) colVector; + + final Object map = mapOI.create(); + while (deserializeRead.isNextComplexMultiValue()) { + final Object key = convertComplexFieldRowColumn(mapColumnVector.keys, batchIndex, keyField); + final Object value = convertComplexFieldRowColumn(mapColumnVector.values, batchIndex, valueField); + mapOI.put(map, key, value); + } + return map; + } + + private Object convertStructRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { + + final SettableStructObjectInspector structOI = (SettableStructObjectInspector) field.objectInspector; + final List structFields = structOI.getAllStructFieldRefs(); + final StructComplexTypeHelper structHelper = (StructComplexTypeHelper) field.getComplexHelper(); + final Field[] fields = structHelper.getFields(); + final StructColumnVector structColumnVector = (StructColumnVector) colVector; + + final Object struct = structOI.create(); + for (int i = 0; i < fields.length; i++) { + final Object fieldObject = + convertComplexFieldRowColumn(structColumnVector.fields[i], batchIndex, fields[i]); + structOI.setStructFieldData(struct, structFields.get(i), fieldObject); + } + deserializeRead.finishComplexVariableFieldsType(); + return struct; + } + + private Object convertUnionRowColumn( + ColumnVector colVector, int batchIndex, Field field) throws IOException { + + final SettableUnionObjectInspector unionOI = (SettableUnionObjectInspector) field.objectInspector; + final UnionComplexTypeHelper unionHelper = (UnionComplexTypeHelper) field.getComplexHelper(); + final Field[] fields = unionHelper.getFields(); + final UnionColumnVector unionColumnVector = (UnionColumnVector) colVector; + + final Object union = unionOI.create(); + final int tag = deserializeRead.currentInt; + unionOI.addField(union, new StandardUnion( + (byte) tag, convertComplexFieldRowColumn(unionColumnVector.fields[tag], batchIndex, fields[tag]))); + deserializeRead.finishComplexVariableFieldsType(); + return union; } /** @@ -1102,12 +1313,6 @@ public void deserialize(VectorizedRowBatch batch, int batchIndex) throws IOExcep deserializeRead.skipNextField(); continue; } - if (!deserializeRead.readNextField()) { - ColumnVector colVector = batch.cols[projectionColumnNum]; - colVector.isNull[batchIndex] = true; - colVector.noNulls = false; - continue; - } // The current* members of deserializeRead have the field value. field = topLevelFields[i]; if (field.getIsConvert()) { @@ -1120,13 +1325,6 @@ public void deserialize(VectorizedRowBatch batch, int batchIndex) throws IOExcep final int readFieldCount = readFieldLogicalIndices.length; for (int i = 0; i < readFieldCount; i++) { final int logicalIndex = readFieldLogicalIndices[i]; - // Jump to the field we want and read it. - if (!deserializeRead.readField(logicalIndex)) { - ColumnVector colVector = batch.cols[projectionColumnNums[logicalIndex]]; - colVector.isNull[batchIndex] = true; - colVector.noNulls = false; - continue; - } // The current* members of deserializeRead have the field value. field = topLevelFields[logicalIndex]; if (field.getIsConvert()) { @@ -1172,12 +1370,6 @@ public void deserializeByRef(VectorizedRowBatch batch, int batchIndex) throws IO deserializeRead.skipNextField(); continue; } - if (!deserializeRead.readNextField()) { - ColumnVector colVector = batch.cols[projectionColumnNum]; - colVector.isNull[batchIndex] = true; - colVector.noNulls = false; - continue; - } // The current* members of deserializeRead have the field value. field = topLevelFields[i]; if (field.getIsConvert()) { @@ -1190,13 +1382,6 @@ public void deserializeByRef(VectorizedRowBatch batch, int batchIndex) throws IO final int readFieldCount = readFieldLogicalIndices.length; for (int i = 0; i < readFieldCount; i++) { final int logicalIndex = readFieldLogicalIndices[i]; - // Jump to the field we want and read it. - if (!deserializeRead.readField(logicalIndex)) { - ColumnVector colVector = batch.cols[projectionColumnNums[logicalIndex]]; - colVector.isNull[batchIndex] = true; - colVector.noNulls = false; - continue; - } // The current* members of deserializeRead have the field value. field = topLevelFields[logicalIndex]; if (field.getIsConvert()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index 1201499e23..23fdaa554f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -257,8 +257,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - LOG.info("null binary entry: batchIndex " + batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } BytesWritable bytesWritable = (BytesWritable) primitiveWritable; @@ -273,8 +279,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String. @@ -289,8 +301,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } final int adjustedLength = StringExpr.truncate(bytes, start, length, @@ -308,8 +326,14 @@ Object extractRowColumn( final int start = bytesColVector.start[adjustedIndex]; final int length = bytesColVector.length[adjustedIndex]; - if (bytes == null) { - nullBytesReadError(primitiveCategory, batchIndex); + if (bytesColVector.isRepeating) { + if (!bytesColVector.isNull[0] && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } + } else { + if ((bytesColVector.noNulls || !bytesColVector.isNull[batchIndex]) && bytes == null) { + nullBytesReadError(primitiveCategory, batchIndex); + } } final int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java index a6cda4e818..886c298852 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java @@ -842,7 +842,7 @@ public static String getString(Object o, PrimitiveObjectInspector oi) { } break; case BOOLEAN: - result = String.valueOf((((BooleanObjectInspector) oi).get(o))); + result = (((BooleanObjectInspector) oi).get(o)) ? "TRUE" : "FALSE"; break; case BYTE: result = String.valueOf((((ByteObjectInspector) oi).get(o)));