diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index 9c84937031..cbe87bf8ef 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -18,12 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; -import java.sql.Date; -import java.sql.Timestamp; -import java.util.List; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import com.google.common.base.Preconditions; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -41,18 +36,27 @@ import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; @@ -61,8 +65,17 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; -import com.google.common.base.Preconditions; +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Set; /** @@ -89,11 +102,8 @@ // Assigning can be a subset of columns, so this is the projection -- // the batch column numbers. - Category[] targetCategories; - // The data type category of each column being assigned. - - PrimitiveCategory[] targetPrimitiveCategories; - // The data type primitive category of each column being assigned. + TypeInfo[] targetTypeInfos; + // The type information of each column being assigned. int[] maxLengths; // For the CHAR and VARCHAR data types, the maximum character length of @@ -103,11 +113,11 @@ * These members have information for data type conversion. * Not defined if there is no conversion. */ - PrimitiveObjectInspector[] convertSourcePrimitiveObjectInspectors; + ObjectInspector[] convertSourceObjectInspectors; // The primitive object inspector of the source data type for any column being // converted. Otherwise, null. - Writable[] convertTargetWritables; + Writable[] flatConvertTargetWritables; // Conversion to the target data type requires a "helper" target writable in a // few cases. @@ -117,8 +127,7 @@ private void allocateArrays(int count) { isConvert = new boolean[count]; projectionColumnNums = new int[count]; - targetCategories = new Category[count]; - targetPrimitiveCategories = new PrimitiveCategory[count]; + targetTypeInfos = new TypeInfo[count]; maxLengths = new int[count]; } @@ -126,8 +135,7 @@ private void allocateArrays(int count) { * Allocate the source conversion related arrays (optional). */ private void allocateConvertArrays(int count) { - convertSourcePrimitiveObjectInspectors = new PrimitiveObjectInspector[count]; - convertTargetWritables = new Writable[count]; + convertSourceObjectInspectors = new PrimitiveObjectInspector[count]; } /* @@ -137,11 +145,10 @@ private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, Ty isConvert[logicalColumnIndex] = false; projectionColumnNums[logicalColumnIndex] = projectionColumnNum; Category category = typeInfo.getCategory(); - targetCategories[logicalColumnIndex] = category; + targetTypeInfos[logicalColumnIndex] = typeInfo; if (category == Category.PRIMITIVE) { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - targetPrimitiveCategories[logicalColumnIndex] = primitiveCategory; switch (primitiveCategory) { case CHAR: maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength(); @@ -162,27 +169,50 @@ private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, Ty */ private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) { isConvert[logicalColumnIndex] = true; - Category convertSourceCategory = convertSourceTypeInfo.getCategory(); - if (convertSourceCategory == Category.PRIMITIVE) { - PrimitiveTypeInfo convertSourcePrimitiveTypeInfo = (PrimitiveTypeInfo) convertSourceTypeInfo; - convertSourcePrimitiveObjectInspectors[logicalColumnIndex] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - convertSourcePrimitiveTypeInfo); - - // These need to be based on the target. - PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex]; - switch (targetPrimitiveCategory) { - case DATE: - convertTargetWritables[logicalColumnIndex] = new DateWritable(); - break; - case STRING: - convertTargetWritables[logicalColumnIndex] = new Text(); - break; - default: - // No additional data type specific setting. - break; + convertSourceObjectInspectors[logicalColumnIndex] = createConvertSourceObjectInspector(convertSourceTypeInfo); + } + + private ObjectInspector createConvertSourceObjectInspector(TypeInfo typeInfo) { + switch (typeInfo.getCategory()) { + case PRIMITIVE: + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) typeInfo); + case LIST: + return ObjectInspectorFactory.getStandardListObjectInspector( + createConvertSourceObjectInspector(((ListTypeInfo) typeInfo).getListElementTypeInfo())); + case MAP: + { + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + ObjectInspector keyObjectInspector = + createConvertSourceObjectInspector(mapTypeInfo.getMapKeyTypeInfo()); + ObjectInspector valueObjectInspector = + createConvertSourceObjectInspector(mapTypeInfo.getMapValueTypeInfo()); + return ObjectInspectorFactory.getStandardMapObjectInspector( + keyObjectInspector, valueObjectInspector); + } + case STRUCT: + { + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List objectInspectors = new ArrayList(); + for (TypeInfo fieldTypeInfo : structTypeInfo.getAllStructFieldTypeInfos()) { + objectInspectors.add(createConvertSourceObjectInspector(fieldTypeInfo)); + } + return ObjectInspectorFactory.getStandardStructObjectInspector( + structTypeInfo.getAllStructFieldNames(), objectInspectors); + } + case UNION: + { + List unionObjectTypeInfos = + ((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos(); + List objectInspectors = + new ArrayList(unionObjectTypeInfos.size()); + for (TypeInfo unionObjectTypeInfo : unionObjectTypeInfos) { + objectInspectors.add(createConvertSourceObjectInspector(unionObjectTypeInfo)); + } + return ObjectInspectorFactory.getStandardUnionObjectInspector(objectInspectors); } } + return null; } /* @@ -322,9 +352,101 @@ public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos } } + flatConvertTargetWritables = + allocateWritables(targetTypeInfos, + EnumSet.of(PrimitiveCategory.DATE, PrimitiveCategory.STRING)); return sourceColumnCount; } + static Writable[] allocateWritables(TypeInfo[] typeInfos, Set primitives) { + Writable[] writables = new Writable[countFlatColumns(Arrays.asList(typeInfos))]; + int nextIndex = 0; + for (int i = 0; i < typeInfos.length; i++) { + nextIndex = allocateWritable(writables, typeInfos[i], primitives, nextIndex); + } + return writables; + } + + static Writable[] allocateWritables(TypeInfo[] typeInfos, int[] projectionColumnNums, + Set primitives, VectorizedRowBatch batch) { + + Writable[] writables = new Writable[batch.flatColumns]; + for (int i = 0; i < typeInfos.length; i++) { + allocateWritable(writables, typeInfos[i], primitives, batch.cols[projectionColumnNums[i]].flatIndex); + } + return writables; + } + + private static int countFlatColumns(Iterable typeInfos) { + int sum = 0; + for (TypeInfo typeInfo : typeInfos) { + sum += countFlatColumn(typeInfo); + } + return sum; + } + + private static int countFlatColumn(TypeInfo typeInfo) { + switch (typeInfo.getCategory()) { + case PRIMITIVE: + return 1; + case LIST: + return 1 + countFlatColumn(((ListTypeInfo) typeInfo).getListElementTypeInfo()); + case MAP: + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + return 1 + countFlatColumn(mapTypeInfo.getMapKeyTypeInfo()) + countFlatColumn(mapTypeInfo.getMapValueTypeInfo()); + case STRUCT: + return 1 + countFlatColumns(((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos()); + case UNION: + return 1 + countFlatColumns(((UnionTypeInfo) typeInfo).getAllUnionObjectTypeInfos()); + } + throw new IllegalArgumentException(); + } + + // Depth first search + private static int allocateWritable(Writable[] writables, TypeInfo typeInfo, + Set primitives, int nextIndex) { + switch (typeInfo.getCategory()) { + case PRIMITIVE: + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + if (primitives.contains(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory())) { + writables[nextIndex] = + VectorizedBatchUtil.getPrimitiveWritable(primitiveTypeInfo.getPrimitiveCategory()); + } + return nextIndex + 1; + case LIST: + writables[nextIndex] = null; + nextIndex++; + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + return allocateWritable(writables, listTypeInfo.getListElementTypeInfo(), primitives, nextIndex); + case MAP: + writables[nextIndex] = null; + nextIndex++; + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + nextIndex = allocateWritable(writables, mapTypeInfo.getMapKeyTypeInfo(), primitives, nextIndex); + nextIndex = allocateWritable(writables, mapTypeInfo.getMapValueTypeInfo(), primitives, nextIndex); + return nextIndex; + case STRUCT: + writables[nextIndex] = null; + nextIndex++; + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + for (TypeInfo fieldTypeInfo : fieldTypeInfos) { + nextIndex = allocateWritable(writables, fieldTypeInfo, primitives, nextIndex); + } + return nextIndex; + case UNION: + writables[nextIndex] = null; + nextIndex++; + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); + for (TypeInfo objectTypeInfo : objectTypeInfos) { + nextIndex = allocateWritable(writables, objectTypeInfo, primitives, nextIndex); + } + return nextIndex; + } + return -1; + } + /** * Assign a row's column object to the ColumnVector at batchIndex in the VectorizedRowBatch. * @@ -335,74 +457,71 @@ public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos */ public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, Object object) { - Category targetCategory = targetCategories[logicalColumnIndex]; - if (targetCategory == null) { + TypeInfo logicalTypeInfo = targetTypeInfos[logicalColumnIndex]; + if (logicalTypeInfo == null) { /* * This is a column that we don't want (i.e. not included) -- we are done. */ return; } final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; + ColumnVector projectionColumnVector = batch.cols[projectionColumnNum]; + assignRowColumn(batchIndex, logicalTypeInfo, projectionColumnVector, object); + } + + public void assignRowColumn(int batchIndex, TypeInfo logicalTypeInfo, + ColumnVector colVector, Object object) { if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); return; } + Category targetCategory = logicalTypeInfo.getCategory(); switch (targetCategory) { case PRIMITIVE: { - PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex]; + PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) logicalTypeInfo).getPrimitiveCategory(); switch (targetPrimitiveCategory) { case VOID: - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex); return; case BOOLEAN: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - (((BooleanWritable) object).get() ? 1 : 0); + ((LongColumnVector) colVector).vector[batchIndex] = (((BooleanWritable) object).get() ? 1 : 0); break; case BYTE: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((ByteWritable) object).get(); + ((LongColumnVector) colVector).vector[batchIndex] = ((ByteWritable) object).get(); break; case SHORT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((ShortWritable) object).get(); + ((LongColumnVector) colVector).vector[batchIndex] = ((ShortWritable) object).get(); break; case INT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((IntWritable) object).get(); + ((LongColumnVector) colVector).vector[batchIndex] = ((IntWritable) object).get(); break; case LONG: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((LongWritable) object).get(); + ((LongColumnVector) colVector).vector[batchIndex] = ((LongWritable) object).get(); break; case TIMESTAMP: - ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( - batchIndex, ((TimestampWritable) object).getTimestamp()); + ((TimestampColumnVector) colVector).set(batchIndex, ((TimestampWritable) object).getTimestamp()); break; case DATE: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((DateWritable) object).getDays(); + ((LongColumnVector) colVector).vector[batchIndex] = ((DateWritable) object).getDays(); break; case FLOAT: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((FloatWritable) object).get(); + ((DoubleColumnVector) colVector).vector[batchIndex] = ((FloatWritable) object).get(); break; case DOUBLE: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - ((DoubleWritable) object).get(); + ((DoubleColumnVector) colVector).vector[batchIndex] = ((DoubleWritable) object).get(); break; case BINARY: { BytesWritable bw = (BytesWritable) object; - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) colVector).setVal( batchIndex, bw.getBytes(), 0, bw.getLength()); } break; case STRING: { Text tw = (Text) object; - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( - batchIndex, tw.getBytes(), 0, tw.getLength()); + ((BytesColumnVector) colVector).setVal(batchIndex, tw.getBytes(), 0, tw.getLength()); } break; case VARCHAR: @@ -420,8 +539,7 @@ public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logica // TODO: HIVE-13624 Do we need maxLength checking? byte[] bytes = hiveVarchar.getValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( - batchIndex, bytes, 0, bytes.length); + ((BytesColumnVector) colVector).setVal(batchIndex, bytes, 0, bytes.length); } break; case CHAR: @@ -440,33 +558,102 @@ public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logica // We store CHAR in vector row batch with padding stripped. byte[] bytes = hiveChar.getStrippedValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( - batchIndex, bytes, 0, bytes.length); + ((BytesColumnVector) colVector).setVal(batchIndex, bytes, 0, bytes.length); } break; case DECIMAL: if (object instanceof HiveDecimal) { - ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( - batchIndex, (HiveDecimal) object); + ((DecimalColumnVector) colVector).set(batchIndex, (HiveDecimal) object); } else { - ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( - batchIndex, (HiveDecimalWritable) object); + ((DecimalColumnVector) colVector).set(batchIndex, (HiveDecimalWritable) object); } break; case INTERVAL_YEAR_MONTH: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) colVector).vector[batchIndex] = ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths(); break; case INTERVAL_DAY_TIME: - ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set( + ((IntervalDayTimeColumnVector) colVector).set( batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime()); break; default: throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() + " not supported"); + } + break; + } + case LIST: + { + ListColumnVector listColumnVector = (ListColumnVector) colVector; + ColumnVector childColumnVector = listColumnVector.child; + TypeInfo elementTypeInfo = ((ListTypeInfo) logicalTypeInfo).getListElementTypeInfo(); + + List list = (List) object; + int size = list.size(); + int offset = listColumnVector.childCount; + listColumnVector.offsets[batchIndex] = offset; + listColumnVector.lengths[batchIndex] = size; + listColumnVector.childCount += size; + childColumnVector.ensureSize(offset + size, true); + + for (int i = 0; i < size; i++) { + assignRowColumn(offset + i, elementTypeInfo, childColumnVector, list.get(i)); + } + } + break; + case MAP: + { + MapColumnVector mapColumnVector = (MapColumnVector) colVector; + ColumnVector keyColumnVector = mapColumnVector.keys; + ColumnVector valueColumnVector = mapColumnVector.values; + MapTypeInfo mapTypeInfo = (MapTypeInfo) logicalTypeInfo; + TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo(); + TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo(); + + Map map = (Map) object; + List entries = new ArrayList(map.entrySet()); + int size = map.size(); + int offset = mapColumnVector.childCount; + mapColumnVector.offsets[batchIndex] = offset; + mapColumnVector.lengths[batchIndex] = size; + mapColumnVector.childCount += size; + keyColumnVector.ensureSize(offset + size, true); + valueColumnVector.ensureSize(offset + size, true); + + for (int i = 0; i < size; i++) { + Map.Entry entry = entries.get(i); + assignRowColumn(offset + i, keyTypeInfo, keyColumnVector, entry.getKey()); + assignRowColumn(offset + i, valueTypeInfo, valueColumnVector, entry.getValue()); } } break; + case STRUCT: + { + StructColumnVector structColumnVector = (StructColumnVector) colVector; + ColumnVector[] fieldColumnVectors = structColumnVector.fields; + StructTypeInfo structTypeInfo = (StructTypeInfo) logicalTypeInfo; + List typeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + + List list = (List) object; + int size = list.size(); + + for (int i = 0; i < size; i++) { + assignRowColumn(batchIndex, typeInfos.get(i), fieldColumnVectors[i], list.get(i)); + } + } + break; + case UNION: + { + UnionColumnVector unionColumnVector = (UnionColumnVector) colVector; + ColumnVector[] fieldColumnVectors = unionColumnVector.fields; + StandardUnionObjectInspector.StandardUnion union = (StandardUnionObjectInspector.StandardUnion) object; + byte tag = union.getTag(); + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) logicalTypeInfo; + unionColumnVector.tags[batchIndex] = tag; + assignRowColumn(batchIndex, unionTypeInfo.getAllUnionObjectTypeInfos().get(tag), + fieldColumnVectors[tag], union.getObject()); + } + break; default: throw new RuntimeException("Category " + targetCategory.name() + " not supported"); } @@ -474,7 +661,7 @@ public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logica /* * We always set the null flag to false when there is a value. */ - batch.cols[projectionColumnNum].isNull[batchIndex] = false; + colVector.isNull[batchIndex] = false; } /** @@ -493,112 +680,113 @@ public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logica public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex, Object object) { Preconditions.checkState(isConvert[logicalColumnIndex]); - Category targetCategory = targetCategories[logicalColumnIndex]; + + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; + ColumnVector projectionColumnVector = batch.cols[projectionColumnNum]; + ObjectInspector logicalObjectInspector = convertSourceObjectInspectors[logicalColumnIndex]; + + TypeInfo typeInfo = targetTypeInfos[logicalColumnIndex]; + Category targetCategory = typeInfo.getCategory(); if (targetCategory == null) { /* * This is a column that we don't want (i.e. not included) -- we are done. */ return; } - final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; if (object == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(projectionColumnVector, batchIndex); return; } + assignConvertRowColumn(batchIndex, typeInfo, logicalObjectInspector, projectionColumnVector, object); + } + + public void assignConvertRowColumn(int batchIndex, TypeInfo typeInfo, + ObjectInspector objectInspector, ColumnVector columnVector, Object object) { + Category targetCategory = typeInfo.getCategory(); try { switch (targetCategory) { case PRIMITIVE: - PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex]; + PrimitiveCategory targetPrimitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + PrimitiveObjectInspector primitiveLogicalOI = (PrimitiveObjectInspector) objectInspector; switch (targetPrimitiveCategory) { case VOID: - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; case BOOLEAN: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - (PrimitiveObjectInspectorUtils.getBoolean( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]) ? 1 : 0); + ((LongColumnVector) columnVector).vector[batchIndex] = + (PrimitiveObjectInspectorUtils.getBoolean(object, primitiveLogicalOI) ? 1 : 0); break; case BYTE: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - PrimitiveObjectInspectorUtils.getByte( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + ((LongColumnVector) columnVector).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getByte(object, primitiveLogicalOI); break; case SHORT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - PrimitiveObjectInspectorUtils.getShort( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + ((LongColumnVector) columnVector).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getShort(object, primitiveLogicalOI); break; case INT: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - PrimitiveObjectInspectorUtils.getInt( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + ((LongColumnVector) columnVector).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getInt(object, primitiveLogicalOI); break; case LONG: - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - PrimitiveObjectInspectorUtils.getLong( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + ((LongColumnVector) columnVector).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getLong(object, primitiveLogicalOI); break; case TIMESTAMP: { Timestamp timestamp = - PrimitiveObjectInspectorUtils.getTimestamp( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getTimestamp(object, primitiveLogicalOI); if (timestamp == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((TimestampColumnVector) batch.cols[projectionColumnNum]).set( - batchIndex, timestamp); + ((TimestampColumnVector) columnVector).set(batchIndex, timestamp); } break; case DATE: { - Date date = PrimitiveObjectInspectorUtils.getDate( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + Date date = PrimitiveObjectInspectorUtils.getDate(object, primitiveLogicalOI); if (date == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - DateWritable dateWritable = (DateWritable) convertTargetWritables[logicalColumnIndex]; + DateWritable dateWritable = (DateWritable) + flatConvertTargetWritables[columnVector.flatIndex]; dateWritable.set(date); - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = dateWritable.getDays(); } break; case FLOAT: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - PrimitiveObjectInspectorUtils.getFloat( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + ((DoubleColumnVector) columnVector).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getFloat(object, primitiveLogicalOI); break; case DOUBLE: - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = - PrimitiveObjectInspectorUtils.getDouble( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + ((DoubleColumnVector) columnVector).vector[batchIndex] = + PrimitiveObjectInspectorUtils.getDouble(object, primitiveLogicalOI); break; case BINARY: { BytesWritable bytesWritable = - PrimitiveObjectInspectorUtils.getBinary( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getBinary(object, primitiveLogicalOI); if (bytesWritable == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength()); } break; case STRING: { - String string = PrimitiveObjectInspectorUtils.getString( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + String string = PrimitiveObjectInspectorUtils.getString(object, primitiveLogicalOI); if (string == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - Text text = (Text) convertTargetWritables[logicalColumnIndex]; + Text text = (Text) flatConvertTargetWritables[columnVector.flatIndex]; text.set(string); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, text.getBytes(), 0, text.getLength()); } break; @@ -607,18 +795,16 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, // UNDONE: Performance problem with conversion to String, then bytes... HiveVarchar hiveVarchar = - PrimitiveObjectInspectorUtils.getHiveVarchar( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getHiveVarchar(object, primitiveLogicalOI); if (hiveVarchar == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // TODO: Do we need maxLength checking? byte[] bytes = hiveVarchar.getValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( - batchIndex, bytes, 0, bytes.length); + ((BytesColumnVector) columnVector).setVal(batchIndex, bytes, 0, bytes.length); } break; case CHAR: @@ -626,10 +812,9 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, // UNDONE: Performance problem with conversion to String, then bytes... HiveChar hiveChar = - PrimitiveObjectInspectorUtils.getHiveChar( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getHiveChar(object, primitiveLogicalOI); if (hiveChar == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // We store CHAR in vector row batch with padding stripped. @@ -637,47 +822,42 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, // TODO: Do we need maxLength checking? byte[] bytes = hiveChar.getStrippedValue().getBytes(); - ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal( + ((BytesColumnVector) columnVector).setVal( batchIndex, bytes, 0, bytes.length); } break; case DECIMAL: { HiveDecimal hiveDecimal = - PrimitiveObjectInspectorUtils.getHiveDecimal( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getHiveDecimal(object, primitiveLogicalOI); if (hiveDecimal == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((DecimalColumnVector) batch.cols[projectionColumnNum]).set( - batchIndex, hiveDecimal); + ((DecimalColumnVector) columnVector).set(batchIndex, hiveDecimal); } break; case INTERVAL_YEAR_MONTH: { HiveIntervalYearMonth intervalYearMonth = - PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(object, primitiveLogicalOI); if (intervalYearMonth == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] = + ((LongColumnVector) columnVector).vector[batchIndex] = intervalYearMonth.getTotalMonths(); } break; case INTERVAL_DAY_TIME: { HiveIntervalDayTime intervalDayTime = - PrimitiveObjectInspectorUtils.getHiveIntervalDayTime( - object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]); + PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(object, primitiveLogicalOI); if (intervalDayTime == null) { - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } - ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set( - batchIndex, intervalDayTime); + ((IntervalDayTimeColumnVector) columnVector).set(batchIndex, intervalDayTime); } break; default: @@ -685,18 +865,101 @@ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex, " not supported"); } break; + case LIST: + { + ListColumnVector listColumnVector = (ListColumnVector) columnVector; + ColumnVector childColumnVector = listColumnVector.child; + TypeInfo elementTypeInfo = ((ListTypeInfo) typeInfo).getListElementTypeInfo(); + ListObjectInspector listOI = (ListObjectInspector) objectInspector; + ObjectInspector elementOI = listOI.getListElementObjectInspector(); + + int size = listOI.getListLength(object); + int offset = listColumnVector.childCount; + listColumnVector.offsets[batchIndex] = offset; + listColumnVector.lengths[batchIndex] = size; + listColumnVector.childCount += size; + childColumnVector.ensureSize(offset + size, true); + + for (int i = 0; i < size; i++) { + assignConvertRowColumn( + offset + i, elementTypeInfo, elementOI, childColumnVector, + listOI.getListElement(object, i)); + } + } + break; + case MAP: + { + MapColumnVector mapColumnVector = (MapColumnVector) columnVector; + ColumnVector keyColumnVector = mapColumnVector.keys; + ColumnVector valueColumnVector = mapColumnVector.values; + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo(); + TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo(); + MapObjectInspector mapOI = (MapObjectInspector) objectInspector; + ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); + ObjectInspector valueOI = mapOI.getMapValueObjectInspector(); + + Map map = mapOI.getMap(object); + List entries = new ArrayList(map.entrySet()); + int size = map.size(); + int offset = mapColumnVector.childCount; + mapColumnVector.offsets[batchIndex] = offset; + mapColumnVector.lengths[batchIndex] = size; + mapColumnVector.childCount += size; + keyColumnVector.ensureSize(offset + size, true); + valueColumnVector.ensureSize(offset + size, true); + + for (int i = 0; i < size; i++) { + Map.Entry entry = entries.get(i); + assignConvertRowColumn( + offset + i, keyTypeInfo, keyOI, keyColumnVector, entry.getKey()); + assignConvertRowColumn( + offset + i, valueTypeInfo, valueOI, valueColumnVector, entry.getValue()); + } + } + break; + case STRUCT: + { + StructColumnVector structColumnVector = (StructColumnVector) columnVector; + ColumnVector[] fieldColumnVectors = structColumnVector.fields; + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List typeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + StructObjectInspector structOI = (StructObjectInspector) objectInspector; + List fields = structOI.getAllStructFieldRefs(); + int size = fields.size(); + + for (int i = 0; i < size; i++) { + StructField field = fields.get(i); + ObjectInspector fieldOI = field.getFieldObjectInspector(); + assignConvertRowColumn(batchIndex, typeInfos.get(i), fieldOI, + fieldColumnVectors[i], structOI.getStructFieldData(object, field)); + } + } + break; + case UNION: + { + UnionColumnVector unionColumnVector = (UnionColumnVector) columnVector; + ColumnVector[] fieldColumnVectors = unionColumnVector.fields; + UnionObjectInspector unionOI = (UnionObjectInspector) objectInspector; + byte tag = unionOI.getTag(object); + ObjectInspector elementOI = unionOI.getObjectInspectors().get(tag); + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + assignConvertRowColumn(batchIndex, unionTypeInfo, elementOI, + fieldColumnVectors[tag], unionOI.getField(object)); + } + break; default: throw new RuntimeException("Category " + targetCategory.name() + " not supported"); } } catch (NumberFormatException e) { // Some of the conversion methods throw this exception on numeric parsing errors. - VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex); + VectorizedBatchUtil.setNullColIsNullValue(columnVector, batchIndex); return; } // We always set the null flag to false when there is a value. - batch.cols[projectionColumnNum].isNull[batchIndex] = false; + columnVector.isNull[batchIndex] = false; } /* diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java index defaf9082f..f32e8e2962 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java @@ -18,10 +18,18 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.EnumSet; import java.util.List; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ArrayMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SettableUnionObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.ByteWritable; @@ -51,9 +59,13 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import com.google.common.base.Charsets; +import static org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow.allocateWritables; + /** * This class extracts specified VectorizedRowBatch row columns into writables. * @@ -73,28 +85,21 @@ // Extraction can be a subset of columns, so this is the projection -- // the batch column numbers. - Category[] categories; - // The data type category of each column being extracted. - - PrimitiveCategory[] primitiveCategories; - // The data type primitive category of each column being assigned. + TypeInfo[] typeInfos; + ObjectInspector[] objectInspectors; - int[] maxLengths; - // For the CHAR and VARCHAR data types, the maximum character length of - // the columns. Otherwise, 0. - - Writable[] primitiveWritables; + Writable[] flatWritables; // The extracted values will be placed in these writables. + boolean firstBatch = true; + /* * Allocate the various arrays. */ private void allocateArrays(int count) { projectionColumnNums = new int[count]; - categories = new Category[count]; - primitiveCategories = new PrimitiveCategory[count]; - maxLengths = new int[count]; - primitiveWritables = new Writable[count]; + typeInfos = new TypeInfo[count]; + objectInspectors = new ObjectInspector[count]; } /* @@ -102,28 +107,8 @@ private void allocateArrays(int count) { */ private void initEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) { projectionColumnNums[logicalColumnIndex] = projectionColumnNum; - Category category = typeInfo.getCategory(); - categories[logicalColumnIndex] = category; - if (category == Category.PRIMITIVE) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - primitiveCategories[logicalColumnIndex] = primitiveCategory; - - switch (primitiveCategory) { - case CHAR: - maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength(); - break; - case VARCHAR: - maxLengths[logicalColumnIndex] = ((VarcharTypeInfo) primitiveTypeInfo).getLength(); - break; - default: - // No additional data type specific setting. - break; - } - - primitiveWritables[logicalColumnIndex] = - VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory); - } + typeInfos[logicalColumnIndex] = typeInfo; + objectInspectors[logicalColumnIndex] = TypeInfoUtils.getArrayWritableObjectInspectorFromTypeInfo(typeInfo); } /* @@ -147,6 +132,7 @@ public void init(StructObjectInspector structObjectInspector, List proj initEntry(i, projectionColumnNum, typeInfo); } + firstBatch = true; } /* @@ -166,6 +152,7 @@ public void init(TypeInfo[] typeInfos, int[] projectedColumns) initEntry(i, projectionColumnNum, typeInfo); } + firstBatch = true; } /* @@ -184,6 +171,7 @@ public void init(List typeNames) throws HiveException { initEntry(i, i, typeInfo); } + firstBatch = true; } public int getCount() { @@ -199,6 +187,12 @@ public int getCount() { * @return */ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex) { + if (firstBatch) { + flatWritables = + allocateWritables(typeInfos, projectionColumnNums, EnumSet.allOf(PrimitiveCategory.class), batch); + firstBatch = false; + } + final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; ColumnVector colVector = batch.cols[projectionColumnNum]; if (colVector == null) { @@ -206,62 +200,75 @@ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int log // may ask for them.. return null; } + TypeInfo typeInfo = typeInfos[logicalColumnIndex]; + ObjectInspector objectInspector = objectInspectors[logicalColumnIndex]; + return extractRowColumn(colVector, batchIndex, + typeInfo, objectInspector, projectionColumnNum, true); + } + + private Object extractRowColumn(ColumnVector colVector, int batchIndex, + TypeInfo typeInfo, ObjectInspector objectInspector, + int projectionColumnNum, boolean reuseWritable) { int adjustedIndex = (colVector.isRepeating ? 0 : batchIndex); if (!colVector.noNulls && colVector.isNull[adjustedIndex]) { return null; } - Category category = categories[logicalColumnIndex]; + Category category = typeInfo.getCategory(); switch (category) { case PRIMITIVE: { - Writable primitiveWritable = - primitiveWritables[logicalColumnIndex]; - PrimitiveCategory primitiveCategory = primitiveCategories[logicalColumnIndex]; + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + Writable primitiveWritable; + if (reuseWritable) { + primitiveWritable = flatWritables[colVector.flatIndex]; + } else { + primitiveWritable = VectorizedBatchUtil.getPrimitiveWritable(primitiveCategory); + } switch (primitiveCategory) { case VOID: return null; case BOOLEAN: ((BooleanWritable) primitiveWritable).set( - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex] == 0 ? + ((LongColumnVector) colVector).vector[adjustedIndex] == 0 ? false : true); return primitiveWritable; case BYTE: ((ByteWritable) primitiveWritable).set( - (byte) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (byte) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case SHORT: ((ShortWritable) primitiveWritable).set( - (short) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (short) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INT: ((IntWritable) primitiveWritable).set( - (int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case LONG: ((LongWritable) primitiveWritable).set( - ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case TIMESTAMP: ((TimestampWritable) primitiveWritable).set( - ((TimestampColumnVector) batch.cols[projectionColumnNum]).asScratchTimestamp(adjustedIndex)); + ((TimestampColumnVector) colVector).asScratchTimestamp(adjustedIndex)); return primitiveWritable; case DATE: ((DateWritable) primitiveWritable).set( - (int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case FLOAT: ((FloatWritable) primitiveWritable).set( - (float) ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (float) ((DoubleColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case DOUBLE: ((DoubleWritable) primitiveWritable).set( - ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + ((DoubleColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case BINARY: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); + BytesColumnVector bytesColVector = (BytesColumnVector) colVector; byte[] bytes = bytesColVector.vector[adjustedIndex]; int start = bytesColVector.start[adjustedIndex]; int length = bytesColVector.length[adjustedIndex]; @@ -276,8 +283,7 @@ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int log } case STRING: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); + BytesColumnVector bytesColVector = (BytesColumnVector) colVector; byte[] bytes = bytesColVector.vector[adjustedIndex]; int start = bytesColVector.start[adjustedIndex]; int length = bytesColVector.length[adjustedIndex]; @@ -292,8 +298,7 @@ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int log } case VARCHAR: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); + BytesColumnVector bytesColVector = (BytesColumnVector) colVector; byte[] bytes = bytesColVector.vector[adjustedIndex]; int start = bytesColVector.start[adjustedIndex]; int length = bytesColVector.length[adjustedIndex]; @@ -303,7 +308,7 @@ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int log } int adjustedLength = StringExpr.truncate(bytes, start, length, - maxLengths[logicalColumnIndex]); + ((VarcharTypeInfo) typeInfo).getLength()); HiveVarcharWritable hiveVarcharWritable = (HiveVarcharWritable) primitiveWritable; hiveVarcharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), -1); @@ -311,8 +316,7 @@ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int log } case CHAR: { - BytesColumnVector bytesColVector = - ((BytesColumnVector) batch.cols[projectionColumnNum]); + BytesColumnVector bytesColVector = (BytesColumnVector) colVector; byte[] bytes = bytesColVector.vector[adjustedIndex]; int start = bytesColVector.start[adjustedIndex]; int length = bytesColVector.length[adjustedIndex]; @@ -321,32 +325,130 @@ public Object extractRowColumn(VectorizedRowBatch batch, int batchIndex, int log nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum); } - int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, - maxLengths[logicalColumnIndex]); + int maxLength = ((CharTypeInfo) typeInfo).getLength(); + int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length, maxLength); HiveCharWritable hiveCharWritable = (HiveCharWritable) primitiveWritable; - hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), - maxLengths[logicalColumnIndex]); + hiveCharWritable.set(new String(bytes, start, adjustedLength, Charsets.UTF_8), maxLength); return primitiveWritable; } case DECIMAL: // The HiveDecimalWritable set method will quickly copy the deserialized decimal writable fields. ((HiveDecimalWritable) primitiveWritable).set( - ((DecimalColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + ((DecimalColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INTERVAL_YEAR_MONTH: ((HiveIntervalYearMonthWritable) primitiveWritable).set( - (int) ((LongColumnVector) batch.cols[projectionColumnNum]).vector[adjustedIndex]); + (int) ((LongColumnVector) colVector).vector[adjustedIndex]); return primitiveWritable; case INTERVAL_DAY_TIME: ((HiveIntervalDayTimeWritable) primitiveWritable).set( - ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).asScratchIntervalDayTime(adjustedIndex)); + ((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedIndex)); return primitiveWritable; default: throw new RuntimeException("Primitive category " + primitiveCategory.name() + " not supported"); } } + case LIST: + { + ListColumnVector listColVector = (ListColumnVector) colVector; + ColumnVector childColVector = listColVector.child; + + int offset = (int) listColVector.offsets[adjustedIndex]; + int length = (int) listColVector.lengths[adjustedIndex]; + + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); + SettableListObjectInspector listOI = (SettableListObjectInspector) objectInspector; + ObjectInspector elementOI = listOI.getListElementObjectInspector(); + Object list = listOI.create(length); + + for (int i = 0; i < length; i++) { + Object object = extractRowColumn(childColVector, offset + i, + elementTypeInfo, elementOI, projectionColumnNum, false); + listOI.set(list, i, object); + } + return list; + } + case MAP: + { + MapColumnVector mapColVector = (MapColumnVector) colVector; + ColumnVector keyColVector = mapColVector.keys; + ColumnVector valueColVector = mapColVector.values; + int offset = (int) mapColVector.offsets[adjustedIndex]; + int length = (int) mapColVector.lengths[adjustedIndex]; + + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo(); + TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo(); + SettableMapObjectInspector mapOI = (SettableMapObjectInspector) objectInspector; + ObjectInspector keyOI = mapOI.getMapKeyObjectInspector(); + ObjectInspector valueOI = mapOI.getMapValueObjectInspector(); + + if (mapOI instanceof ArrayMapObjectInspector) { + ArrayMapObjectInspector arrayMapOI = (ArrayMapObjectInspector) mapOI; + Object map = arrayMapOI.create(length); + + for (int i = 0; i < length; i++) { + Object key = extractRowColumn(keyColVector, offset + i, + keyTypeInfo, keyOI, projectionColumnNum, false); + Object value = extractRowColumn(valueColVector, offset + i, + valueTypeInfo, valueOI, projectionColumnNum, false); + arrayMapOI.put(map, i, key, value); + } + return map; + } else { + Object map = mapOI.create(); + + for (int i = 0; i < length; i++) { + Object key = extractRowColumn(keyColVector, offset + i, + keyTypeInfo, keyOI, projectionColumnNum, false); + Object value = extractRowColumn(valueColVector, offset + i, + valueTypeInfo, valueOI, projectionColumnNum, false); + mapOI.put(map, key, value); + } + return map; + } + } + case STRUCT: + { + StructColumnVector structColVector = (StructColumnVector) colVector; + ColumnVector[] fieldColVectors = structColVector.fields; + int length = fieldColVectors.length; + + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + SettableStructObjectInspector structOI = (SettableStructObjectInspector) objectInspector; + Object struct = structOI.create(); + List fields = structOI.getAllStructFieldRefs(); + + for (int i = 0; i < length; i++) { + TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); + ObjectInspector fieldOI = fields.get(i).getFieldObjectInspector(); + Object fieldObject = extractRowColumn(fieldColVectors[i], adjustedIndex, + fieldTypeInfo, fieldOI, projectionColumnNum, reuseWritable); + structOI.setStructFieldData(struct, fields.get(i), fieldObject); + } + return struct; + } + case UNION: + { + UnionColumnVector unionColVector = (UnionColumnVector) colVector; + ColumnVector[] fieldColVectors = unionColVector.fields; + byte tag = (byte) unionColVector.tags[adjustedIndex]; + + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + TypeInfo objectTypeInfo = unionTypeInfo.getAllUnionObjectTypeInfos().get(tag); + SettableUnionObjectInspector unionOI = (SettableUnionObjectInspector) objectInspector; + ObjectInspector fieldOI = unionOI.getObjectInspectors().get(tag); + Object union = unionOI.create(); + + Object object = extractRowColumn(fieldColVectors[tag], adjustedIndex, + objectTypeInfo, fieldOI, projectionColumnNum, reuseWritable); + unionOI.set(union, tag, object); + return union; + } default: throw new RuntimeException("Category " + category.name() + " not supported"); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index e546a658c7..306fcfb184 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -20,9 +20,7 @@ import java.io.IOException; import java.sql.Date; import java.sql.Timestamp; -import java.util.Arrays; import java.util.LinkedHashMap; -import java.util.List; import java.util.Map; import org.apache.hadoop.hive.common.type.HiveChar; @@ -38,11 +36,7 @@ import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.PartitionDesc; -import org.apache.hadoop.hive.ql.plan.Explain.Level; -import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; @@ -52,7 +46,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.mapred.FileSplit; -import org.apache.hive.common.util.DateUtils; import com.google.common.base.Preconditions; @@ -79,6 +72,7 @@ private int partitionColumnCount; private String[] scratchColumnTypeNames; + private int nextIndex; /** * Constructor for VectorizedRowBatchCtx @@ -201,12 +195,13 @@ public VectorizedRowBatch createVectorizedRowBatch() final int dataAndPartColumnCount = rowColumnTypeInfos.length; final int totalColumnCount = dataAndPartColumnCount + scratchColumnTypeNames.length; VectorizedRowBatch result = new VectorizedRowBatch(totalColumnCount); + nextIndex = 0; if (dataColumnNums == null) { // All data and partition columns. for (int i = 0; i < dataAndPartColumnCount; i++) { TypeInfo typeInfo = rowColumnTypeInfos[i]; - result.cols[i] = VectorizedBatchUtil.createColumnVector(typeInfo); + result.cols[i] = assignFlattenIndex(VectorizedBatchUtil.createColumnVector(typeInfo)); } } else { // Create only needed/included columns data columns. @@ -214,21 +209,22 @@ public VectorizedRowBatch createVectorizedRowBatch() int columnNum = dataColumnNums[i]; Preconditions.checkState(columnNum < dataAndPartColumnCount); TypeInfo typeInfo = rowColumnTypeInfos[columnNum]; - result.cols[columnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); + result.cols[columnNum] = assignFlattenIndex(VectorizedBatchUtil.createColumnVector(typeInfo)); } // Always create partition columns. final int endColumnNum = dataColumnCount + partitionColumnCount; for (int partitionColumnNum = dataColumnCount; partitionColumnNum < endColumnNum; partitionColumnNum++) { TypeInfo typeInfo = rowColumnTypeInfos[partitionColumnNum]; - result.cols[partitionColumnNum] = VectorizedBatchUtil.createColumnVector(typeInfo); + result.cols[partitionColumnNum] = assignFlattenIndex(VectorizedBatchUtil.createColumnVector(typeInfo)); } } for (int i = 0; i < scratchColumnTypeNames.length; i++) { String typeName = scratchColumnTypeNames[i]; result.cols[rowColumnTypeInfos.length + i] = - VectorizedBatchUtil.createColumnVector(typeName); + assignFlattenIndex(VectorizedBatchUtil.createColumnVector(typeName)); } + result.flatColumns = nextIndex; result.setPartitionInfo(dataColumnCount, partitionColumnCount); @@ -236,6 +232,54 @@ public VectorizedRowBatch createVectorizedRowBatch() return result; } + private ColumnVector assignFlattenIndex(ColumnVector columnVector) { + if (columnVector instanceof ListColumnVector) { + columnVector.flatIndex = nextIndex; + nextIndex++; + ColumnVector childColumnVector = ((ListColumnVector) columnVector).child; + assignFlattenIndex(childColumnVector); + return columnVector; + } + + if (columnVector instanceof MapColumnVector) { + columnVector.flatIndex = nextIndex; + nextIndex++; + ColumnVector keyColumnVector = ((MapColumnVector) columnVector).keys; + ColumnVector valueColumnVector = ((MapColumnVector) columnVector).values; + assignFlattenIndex(keyColumnVector); + assignFlattenIndex(valueColumnVector); + return columnVector; + } + + if (columnVector instanceof StructColumnVector) { + columnVector.flatIndex = nextIndex; + nextIndex++; + ColumnVector[] fieldColumnVectors = ((StructColumnVector) columnVector).fields; + int structSize = fieldColumnVectors.length; + for (int i = 0; i < structSize; i++) { + assignFlattenIndex(fieldColumnVectors[i]); + } + return columnVector; + } + + if (columnVector instanceof UnionColumnVector) { + columnVector.flatIndex = nextIndex; + nextIndex++; + ColumnVector[] fieldColumnVectors = ((UnionColumnVector) columnVector).fields; + int unionSize = fieldColumnVectors.length; + for (int i = 0; i < unionSize; i++) { + assignFlattenIndex(fieldColumnVectors[i]); + } + return columnVector; + } + + // Primitive + columnVector.flatIndex = nextIndex; + nextIndex++; + return columnVector; + } + + /** * Add the partition values to the batch * diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index e9ce8e8d6e..18a5e86b20 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -19,11 +19,25 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; +import java.util.List; import java.util.Random; import org.apache.hadoop.hive.ql.metadata.HiveException; import junit.framework.TestCase; +import org.apache.hadoop.hive.serde2.objectinspector.ArrayMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; /** * Unit test for the vectorized conversion to and from row object[]. @@ -39,19 +53,142 @@ void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, vectorExtractRow.extractRow(batch, i, row); Object[] expectedRow = randomRows[firstRandomRowIndex + i]; for (int c = 0; c < rowSize; c++) { - if (!row[c].equals(expectedRow[c])) { + Object actual = row[c]; + Object expected = expectedRow[c]; + TypeInfo typeInfo = vectorExtractRow.typeInfos[c]; + ObjectInspector actualObjectInspector = + TypeInfoUtils.getArrayWritableObjectInspectorFromTypeInfo(typeInfo); + ObjectInspector expectedObjectInspector = + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); + if (!equals(actual, actualObjectInspector, expected, expectedObjectInspector, typeInfo)) { fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch"); } } } } + private boolean equals( + Object actual, ObjectInspector actualObjectInspector, + Object expected, ObjectInspector expectedObjectInspector, TypeInfo typeInfo) { + if (actual == null) { + return expected == null; + } else { + switch (typeInfo.getCategory()) { + case PRIMITIVE: + { + return actual.equals(expected); + } + case LIST: + { + ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo; + TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo(); + + ListObjectInspector actualListOI = (ListObjectInspector) actualObjectInspector; + ListObjectInspector expectedListOI = (ListObjectInspector) expectedObjectInspector; + int actualLength = actualListOI.getListLength(actual); + int expectedLength = expectedListOI.getListLength(expected); + if (actualLength != expectedLength) { + return false; + } + ObjectInspector actualElementOI = actualListOI.getListElementObjectInspector(); + ObjectInspector expectedElementOI = expectedListOI.getListElementObjectInspector(); + + for (int i = 0; i < actualLength; i++) { + Object actualElement = actualListOI.getListElement(actual, i); + Object expectedElement = expectedListOI.getListElement(expected, i); + if (!equals(actualElement, actualElementOI, expectedElement, expectedElementOI, elementTypeInfo)) { + return false; + } + } + return true; + } + case MAP: + { + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo(); + + ArrayMapObjectInspector actualMapOI = (ArrayMapObjectInspector) actualObjectInspector; + MapObjectInspector expectedMapOI = (MapObjectInspector) expectedObjectInspector; + int actualLength = actualMapOI.getMapSize(actual); + int expectedLength = expectedMapOI.getMapSize(expected); + if (actualLength != expectedLength) { + return false; + } + ObjectInspector actualValueOI = actualMapOI.getMapValueObjectInspector(); + ObjectInspector expectedValueOI = expectedMapOI.getMapValueObjectInspector(); + + for (int i = 0; i < actualLength; i++) { + Object actualKey = actualMapOI.getKey(actual, i); + Object actualValue = actualMapOI.getValue(actual, i); + Object expectedValue = expectedMapOI.getMapValueElement(expected, actualKey); + if (!equals(actualValue, actualValueOI, expectedValue, expectedValueOI, valueTypeInfo) ) { + return false; + } + } + return true; + } + case STRUCT: + { + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + + StructObjectInspector actualStructOI = (StructObjectInspector) actualObjectInspector; + StructObjectInspector expectedStructOI = (StructObjectInspector) expectedObjectInspector; + List actualStructFields = actualStructOI.getAllStructFieldRefs(); + List expectedStructFields = expectedStructOI.getAllStructFieldRefs(); + int actualLength = actualStructFields.size(); + int expectedLength = expectedStructFields.size(); + if (actualLength != expectedLength) { + return false; + } + for (int i = 0; i < actualLength; i++) { + StructField actualField = actualStructFields.get(i); + StructField expectedField = expectedStructFields.get(i); + Object actualFieldObject = actualStructOI.getStructFieldData(actual, actualField); + Object expectedFieldObject = expectedStructOI.getStructFieldData(expected, expectedField); + ObjectInspector actualFieldOI = actualField.getFieldObjectInspector(); + ObjectInspector expectedFieldOI = expectedField.getFieldObjectInspector(); + TypeInfo fieldTypeInfo = fieldTypeInfos.get(i); + if (!equals(actualFieldObject, actualFieldOI, expectedFieldObject, expectedFieldOI, fieldTypeInfo)) { + return false; + } + } + return true; + } + case UNION: + { + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); + + UnionObjectInspector actualUnionOI = (UnionObjectInspector) actualObjectInspector; + UnionObjectInspector expectedUnionOI = (UnionObjectInspector) expectedObjectInspector; + + int actualTag = actualUnionOI.getTag(actual); + int expectedTag = expectedUnionOI.getTag(expected); + if (actualTag != expectedTag) { + return false; + } + ObjectInspector actualObjectOI = actualUnionOI.getObjectInspectors().get(actualTag); + ObjectInspector expectedObjectOI = expectedUnionOI.getObjectInspectors().get(expectedTag); + Object actualField = ((UnionObjectInspector) actualObjectInspector).getField(actual); + Object expectedField = ((UnionObjectInspector) expectedObjectInspector).getField(expected); + TypeInfo fieldTypeInfo = fieldTypeInfos.get(actualTag); + if (!equals(actualField, actualObjectOI, expectedField, expectedObjectOI, fieldTypeInfo)) { + return false; + } + } + return true; + } + } + return false; + } + void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveException { String[] emptyScratchTypeNames = new String[0]; VectorRandomRowSource source = new VectorRandomRowSource(); - source.init(r); + source.init(r, true, 4); VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx(); batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames); @@ -69,7 +206,7 @@ void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveExcepti VectorExtractRow vectorExtractRow = new VectorExtractRow(); vectorExtractRow.init(source.typeNames()); - Object[][] randomRows = source.randomRows(10000); + Object[][] randomRows = source.randomRows(1000); if (sort) { source.sort(randomRows); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java index cbde6158e9..e7367714e6 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java @@ -26,20 +26,29 @@ import java.util.Random; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; -import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; @@ -58,11 +67,19 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hive.common.util.DateUtils; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.BytesWritable; +import com.google.common.base.Preconditions; import com.google.common.base.Charsets; /** @@ -76,6 +93,14 @@ private List typeNames; + private Category[] categories; + + private TypeInfo[] typeInfos; + + private List objectInspectorList; + + // Primitive. + private PrimitiveCategory[] primitiveCategories; private PrimitiveTypeInfo[] primitiveTypeInfos; @@ -106,30 +131,57 @@ public StructObjectInspector rowStructObjectInspector() { } public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) { - ArrayList partialPrimitiveObjectInspectorList = + ArrayList partialObjectInspectorList = new ArrayList(partialFieldCount); List columnNames = new ArrayList(partialFieldCount); for (int i = 0; i < partialFieldCount; i++) { columnNames.add(String.format("partial%d", i)); - partialPrimitiveObjectInspectorList.add( - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( - primitiveTypeInfos[i])); + partialObjectInspectorList.add(getObjectInspector(typeInfos[i])); } return ObjectInspectorFactory.getStandardStructObjectInspector( - columnNames, primitiveObjectInspectorList); + columnNames, objectInspectorList); } public void init(Random r) { + init(r, false, 0); + } + + public void init(Random r, boolean includeComplexTypes, int maxComplexDepth) { this.r = r; - chooseSchema(); + chooseSchema(includeComplexTypes, maxComplexDepth); + } + + public void init(int keyCount, Random random, List keyPrimitiveObjectInspectorList, + PrimitiveCategory[] keyPrimitiveCategories, PrimitiveTypeInfo[] keyPrimitiveTypeInfos) { + this.r = random; + columnCount = keyCount; + typeNames = new ArrayList(keyCount); + List columnNames = new ArrayList(keyCount); + categories = new Category[keyCount]; + + for (int i = 0; i < keyCount; i++) { + columnNames.add(String.format("col%d", i)); + typeNames.add(keyPrimitiveTypeInfos[i].getTypeName()); + categories[i] = Category.PRIMITIVE; + } + + typeInfos = keyPrimitiveTypeInfos; + objectInspectorList = keyPrimitiveObjectInspectorList; + primitiveCategories = keyPrimitiveCategories; + primitiveTypeInfos = keyPrimitiveTypeInfos; + primitiveObjectInspectorList = keyPrimitiveObjectInspectorList; + + rowStructObjectInspector = + ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, objectInspectorList); + alphabets = new String[keyCount]; } /* * For now, exclude CHAR until we determine why there is a difference (blank padding) * serializing with LazyBinarySerializeWrite and the regular SerDe... */ - private static String[] possibleHiveTypeNames = { + private static String[] possibleHivePrimitiveTypeNames = { "boolean", "tinyint", "smallint", @@ -149,7 +201,146 @@ public void init(Random r) { "decimal" }; - private void chooseSchema() { + private static String[] possibleHiveComplexTypeNames = { + "array", + "map", + "struct", + "uniontype" + }; + + private String getRandomTypeName(boolean includeComplexTypes) { + String typeName; + if (!includeComplexTypes || r.nextInt(10) != 0) { + typeName = possibleHivePrimitiveTypeNames[r.nextInt(possibleHivePrimitiveTypeNames.length)]; + } else { + typeName = possibleHiveComplexTypeNames[r.nextInt(possibleHiveComplexTypeNames.length)]; + } + return typeName; + } + + private String getDecoratedTypeName(String typeName, boolean includeComplexTypes, int depth, int maxDepth) { + depth++; + boolean includeChildrenComplexTypes = includeComplexTypes && depth < maxDepth; + if (typeName.equals("char")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("char(%d)", maxLength); + } else if (typeName.equals("varchar")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("varchar(%d)", maxLength); + } else if (typeName.equals("decimal")) { + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + } else if (typeName.equals("array")) { + String elementTypeName = getRandomTypeName(includeChildrenComplexTypes); + elementTypeName = getDecoratedTypeName(elementTypeName, includeChildrenComplexTypes, depth, maxDepth); + typeName = String.format("array<%s>", elementTypeName); + } else if (typeName.equals("map")) { + String keyTypeName = getRandomTypeName(includeChildrenComplexTypes); + keyTypeName = getDecoratedTypeName(keyTypeName, includeChildrenComplexTypes, depth, maxDepth); + String valueTypeName = getRandomTypeName(includeChildrenComplexTypes); + valueTypeName = getDecoratedTypeName(valueTypeName, includeChildrenComplexTypes, depth, maxDepth); + typeName = String.format("map<%s,%s>", keyTypeName, valueTypeName); + } else if (typeName.equals("struct")) { + final int fieldCount = 1 + r.nextInt(10); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < fieldCount; i++) { + String fieldTypeName = getRandomTypeName(includeChildrenComplexTypes); + fieldTypeName = getDecoratedTypeName(fieldTypeName, includeChildrenComplexTypes, depth, maxDepth); + if (i > 0) { + sb.append(","); + } + sb.append("col"); + sb.append(i); + sb.append(":"); + sb.append(fieldTypeName); + } + typeName = String.format("struct<%s>", sb.toString()); + } else if (typeName.equals("struct") || + typeName.equals("uniontype")) { + final int fieldCount = 1 + r.nextInt(10); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < fieldCount; i++) { + String fieldTypeName = getRandomTypeName(includeChildrenComplexTypes); + fieldTypeName = getDecoratedTypeName(fieldTypeName, includeChildrenComplexTypes, depth, maxDepth); + if (i > 0) { + sb.append(","); + } + sb.append(fieldTypeName); + } + typeName = String.format("uniontype<%s>", sb.toString()); + } + return typeName; + } + + private ObjectInspector getObjectInspector(TypeInfo typeInfo) { + ObjectInspector objectInspector; + switch (typeInfo.getCategory()) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) typeInfo; + objectInspector = + PrimitiveObjectInspectorFactory. + getPrimitiveWritableObjectInspector(primitiveType); + } + break; + case MAP: + { + MapTypeInfo mapType = (MapTypeInfo) typeInfo; + MapObjectInspector mapInspector = + ObjectInspectorFactory.getStandardMapObjectInspector( + getObjectInspector(mapType.getMapKeyTypeInfo()), + getObjectInspector(mapType.getMapValueTypeInfo())); + objectInspector = mapInspector; + } + break; + case LIST: + { + ListTypeInfo listType = (ListTypeInfo) typeInfo; + ListObjectInspector listInspector = + ObjectInspectorFactory.getStandardListObjectInspector( + getObjectInspector(listType.getListElementTypeInfo())); + objectInspector = listInspector; + } + break; + case STRUCT: + { + StructTypeInfo structType = (StructTypeInfo) typeInfo; + List fieldTypes = structType.getAllStructFieldTypeInfos(); + + List fieldInspectors = new ArrayList(); + for (TypeInfo fieldType : fieldTypes) { + fieldInspectors.add(getObjectInspector(fieldType)); + } + + StructObjectInspector structInspector = + ObjectInspectorFactory.getStandardStructObjectInspector( + structType.getAllStructFieldNames(), fieldInspectors); + objectInspector = structInspector; + } + break; + case UNION: + { + UnionTypeInfo unionType = (UnionTypeInfo) typeInfo; + List fieldTypes = unionType.getAllUnionObjectTypeInfos(); + + List fieldInspectors = new ArrayList(); + for (TypeInfo fieldType : fieldTypes) { + fieldInspectors.add(getObjectInspector(fieldType)); + } + + UnionObjectInspector unionInspector = + ObjectInspectorFactory.getStandardUnionObjectInspector( + fieldInspectors); + objectInspector = unionInspector; + } + break; + default: + throw new RuntimeException("Unexpected category " + typeInfo.getCategory()); + } + Preconditions.checkState(objectInspector != null); + return objectInspector; + } + + private void chooseSchema(boolean includeComplexTypes, int maxComplexDepth) { HashSet hashSet = null; boolean allTypes; boolean onlyOne = (r.nextInt(100) == 7); @@ -160,13 +351,20 @@ private void chooseSchema() { allTypes = r.nextBoolean(); if (allTypes) { // One of each type. - columnCount = possibleHiveTypeNames.length; + columnCount = possibleHivePrimitiveTypeNames.length; + if (includeComplexTypes) { + columnCount += possibleHiveComplexTypeNames.length; + } hashSet = new HashSet(); } else { columnCount = 1 + r.nextInt(20); } } typeNames = new ArrayList(columnCount); + categories = new Category[columnCount]; + typeInfos = new TypeInfo[columnCount]; + objectInspectorList = new ArrayList(columnCount); + primitiveCategories = new PrimitiveCategory[columnCount]; primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; primitiveObjectInspectorList = new ArrayList(columnCount); @@ -176,12 +374,18 @@ private void chooseSchema() { String typeName; if (onlyOne) { - typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)]; + typeName = getRandomTypeName(includeComplexTypes); } else { int typeNum; if (allTypes) { + int maxTypeNum = possibleHivePrimitiveTypeNames.length; + if (includeComplexTypes) { + maxTypeNum += possibleHiveComplexTypeNames.length; + } while (true) { - typeNum = r.nextInt(possibleHiveTypeNames.length); + + typeNum = r.nextInt(maxTypeNum); + Integer typeNumInteger = new Integer(typeNum); if (!hashSet.contains(typeNumInteger)) { hashSet.add(typeNumInteger); @@ -189,30 +393,94 @@ private void chooseSchema() { } } } else { - typeNum = r.nextInt(possibleHiveTypeNames.length); + if (!includeComplexTypes || r.nextInt(10) != 0) { + typeNum = r.nextInt(possibleHivePrimitiveTypeNames.length); + } else { + typeNum = possibleHivePrimitiveTypeNames.length + r.nextInt(possibleHiveComplexTypeNames.length); + } + } + if (typeNum < possibleHivePrimitiveTypeNames.length) { + typeName = possibleHivePrimitiveTypeNames[typeNum]; + } else { + typeName = possibleHiveComplexTypeNames[typeNum - possibleHivePrimitiveTypeNames.length]; } - typeName = possibleHiveTypeNames[typeNum]; + + } + + String decoratedTypeName = getDecoratedTypeName(typeName, includeComplexTypes, 0, maxComplexDepth); + + TypeInfo typeInfo; + try { + typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(decoratedTypeName); + } catch (Exception e) { + throw new RuntimeException("Cannot convert type name " + decoratedTypeName + " to a type " + e); } - if (typeName.equals("char")) { - int maxLength = 1 + r.nextInt(100); - typeName = String.format("char(%d)", maxLength); - } else if (typeName.equals("varchar")) { - int maxLength = 1 + r.nextInt(100); - typeName = String.format("varchar(%d)", maxLength); - } else if (typeName.equals("decimal")) { - typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + + typeInfos[c] = typeInfo; + Category category = typeInfo.getCategory(); + categories[c] = category; + ObjectInspector objectInspector = getObjectInspector(typeInfo); + switch (category) { + case PRIMITIVE: + { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + objectInspector = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); + primitiveTypeInfos[c] = primitiveTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[c] = primitiveCategory; + primitiveObjectInspectorList.add(objectInspector); + } + break; + case LIST: + case MAP: + case STRUCT: + case UNION: + primitiveObjectInspectorList.add(null); + break; + default: + throw new RuntimeException("Unexpected catagory " + category); + } + objectInspectorList.add(objectInspector); + + if (category == Category.PRIMITIVE) { } - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); - primitiveTypeInfos[c] = primitiveTypeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - primitiveCategories[c] = primitiveCategory; - primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); - typeNames.add(typeName); + typeNames.add(decoratedTypeName); } - rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( + columnNames, objectInspectorList); alphabets = new String[columnCount]; } + public Object[][] randomRows(int n) { + Object[][] result = new Object[n][]; + for (int i = 0; i < n; i++) { + result[i] = randomRow(); + } + return result; + } + + public Object[] randomRow() { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + row[c] = randomWritable(c); + } + return row; + } + + public Object[] randomPrimitiveRow(int columnCount) { + return randomPrimitiveRow(columnCount, r, primitiveTypeInfos); + } + + public static Object[] randomPrimitiveRow(int columnCount, Random r, + PrimitiveTypeInfo[] primitiveTypeInfos) { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + row[c] = randomPrimitiveObject(r, primitiveTypeInfos[c]); + } + return row; + } + public void addBinarySortableAlphabets() { for (int c = 0; c < columnCount; c++) { switch (primitiveCategories[c]) { @@ -241,52 +509,6 @@ public void addEscapables(String needsEscapeStr) { this.needsEscapeStr = needsEscapeStr; } - public Object[][] randomRows(int n) { - Object[][] result = new Object[n][]; - for (int i = 0; i < n; i++) { - result[i] = randomRow(); - } - return result; - } - - public Object[] randomRow() { - Object row[] = new Object[columnCount]; - for (int c = 0; c < columnCount; c++) { - Object object = randomObject(c); - if (object == null) { - throw new Error("Unexpected null for column " + c); - } - row[c] = getWritableObject(c, object); - if (row[c] == null) { - throw new Error("Unexpected null for writable for column " + c); - } - } - return row; - } - - public Object[] randomRow(int columnCount) { - return randomRow(columnCount, r, primitiveObjectInspectorList, primitiveCategories, - primitiveTypeInfos); - } - - public static Object[] randomRow(int columnCount, Random r, - List primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories, - PrimitiveTypeInfo[] primitiveTypeInfos) { - Object row[] = new Object[columnCount]; - for (int c = 0; c < columnCount; c++) { - Object object = randomObject(c, r, primitiveCategories, primitiveTypeInfos); - if (object == null) { - throw new Error("Unexpected null for column " + c); - } - row[c] = getWritableObject(c, object, primitiveObjectInspectorList, - primitiveCategories, primitiveTypeInfos); - if (row[c] == null) { - throw new Error("Unexpected null for writable for column " + c); - } - } - return row; - } - public static void sort(Object[][] rows, ObjectInspector oi) { for (int i = 0; i < rows.length; i++) { for (int j = i + 1; j < rows.length; j++) { @@ -303,18 +525,9 @@ public void sort(Object[][] rows) { VectorRandomRowSource.sort(rows, rowStructObjectInspector); } - public Object getWritableObject(int column, Object object) { - return getWritableObject(column, object, primitiveObjectInspectorList, - primitiveCategories, primitiveTypeInfos); - } - - public static Object getWritableObject(int column, Object object, - List primitiveObjectInspectorList, PrimitiveCategory[] primitiveCategories, - PrimitiveTypeInfo[] primitiveTypeInfos) { - ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); - PrimitiveCategory primitiveCategory = primitiveCategories[column]; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; - switch (primitiveCategory) { + public Object getWritablePrimitiveObject(PrimitiveTypeInfo primitiveTypeInfo, + ObjectInspector objectInspector, Object object) { + switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); case BYTE: @@ -361,99 +574,203 @@ public static Object getWritableObject(int column, Object object, return result; } default: - throw new Error("Unknown primitive category " + primitiveCategory); + throw new Error("Unknown primitive category " + primitiveTypeInfo.getPrimitiveCategory()); } } - public Object randomObject(int column) { - return randomObject(column, r, primitiveCategories, primitiveTypeInfos, alphabets, addEscapables, needsEscapeStr); + public Object randomWritable(int column) { + return randomWritable(typeInfos[column], objectInspectorList.get(column)); } - public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories, - PrimitiveTypeInfo[] primitiveTypeInfos) { - return randomObject(column, r, primitiveCategories, primitiveTypeInfos, null, false, ""); - } - - public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories, - PrimitiveTypeInfo[] primitiveTypeInfos, String[] alphabets, boolean addEscapables, String needsEscapeStr) { - PrimitiveCategory primitiveCategory = primitiveCategories[column]; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; - try { - switch (primitiveCategory) { - case BOOLEAN: - return Boolean.valueOf(r.nextInt(1) == 1); - case BYTE: - return Byte.valueOf((byte) r.nextInt()); - case SHORT: - return Short.valueOf((short) r.nextInt()); - case INT: - return Integer.valueOf(r.nextInt()); - case LONG: - return Long.valueOf(r.nextLong()); - case DATE: - return RandomTypeUtil.getRandDate(r); - case FLOAT: - return Float.valueOf(r.nextFloat() * 10 - 5); - case DOUBLE: - return Double.valueOf(r.nextDouble() * 10 - 5); - case STRING: - case CHAR: - case VARCHAR: - { - String result; - if (alphabets != null && alphabets[column] != null) { - result = RandomTypeUtil.getRandString(r, alphabets[column], r.nextInt(10)); - } else { - result = RandomTypeUtil.getRandString(r); + public Object randomWritable(TypeInfo typeInfo, ObjectInspector objectInspector) { + switch (typeInfo.getCategory()) { + case PRIMITIVE: + { + Object object = randomPrimitiveObject(r, (PrimitiveTypeInfo) typeInfo); + return getWritablePrimitiveObject((PrimitiveTypeInfo) typeInfo, objectInspector, object); + } + case LIST: + { + if (r.nextInt(20) == 0) { + return null; + } + // Always generate a list with at least 1 value? + final int elementCount = 1 + r.nextInt(100); + StandardListObjectInspector listObjectInspector = + (StandardListObjectInspector) objectInspector; + ObjectInspector elementObjectInspector = + listObjectInspector.getListElementObjectInspector(); + TypeInfo elementTypeInfo = + TypeInfoUtils.getTypeInfoFromObjectInspector( + elementObjectInspector); + boolean isStringFamily = false; + PrimitiveCategory primitiveCategory = null; + if (elementTypeInfo.getCategory() == Category.PRIMITIVE) { + primitiveCategory = ((PrimitiveTypeInfo) elementTypeInfo).getPrimitiveCategory(); + if (primitiveCategory == PrimitiveCategory.STRING || + primitiveCategory == PrimitiveCategory.BINARY || + primitiveCategory == PrimitiveCategory.CHAR || + primitiveCategory == PrimitiveCategory.VARCHAR) { + isStringFamily = true; } - if (addEscapables && result.length() > 0) { - int escapeCount = 1 + r.nextInt(2); - for (int i = 0; i < escapeCount; i++) { - int index = r.nextInt(result.length()); - String begin = result.substring(0, index); - String end = result.substring(index); - Character needsEscapeChar = needsEscapeStr.charAt(r.nextInt(needsEscapeStr.length())); - result = begin + needsEscapeChar + end; - } + } + Object listObj = listObjectInspector.create(elementCount); + for (int i = 0; i < elementCount; i++) { + Object ele = randomWritable(elementTypeInfo, elementObjectInspector); + // UNDONE: For now, a 1-element list with a null element is a null list... + if (ele == null && elementCount == 1) { + return null; } - switch (primitiveCategory) { - case STRING: - return result; - case CHAR: - return new HiveChar(result, ((CharTypeInfo) primitiveTypeInfo).getLength()); - case VARCHAR: - return new HiveVarchar(result, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); - default: - throw new Error("Unknown primitive category " + primitiveCategory); + if (isStringFamily && elementCount == 1) { + switch (primitiveCategory) { + case STRING: + if (((Text) ele).getLength() == 0) { + return null; + } + break; + case BINARY: + if (((BytesWritable) ele).getLength() == 0) { + return null; + } + break; + case CHAR: + if (((HiveCharWritable) ele).getHiveChar().getStrippedValue().isEmpty()) { + return null; + } + break; + case VARCHAR: + if (((HiveVarcharWritable) ele).getHiveVarchar().getValue().isEmpty()) { + return null; + } + break; + default: + throw new RuntimeException("Unexpected primitive category " + primitiveCategory); + } } + listObjectInspector.set(listObj, i, ele); } - case BINARY: - return getRandBinary(r, 1 + r.nextInt(100)); - case TIMESTAMP: - return RandomTypeUtil.getRandTimestamp(r); - case INTERVAL_YEAR_MONTH: - return getRandIntervalYearMonth(r); - case INTERVAL_DAY_TIME: - return getRandIntervalDayTime(r); - case DECIMAL: - return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); - default: - throw new Error("Unknown primitive category " + primitiveCategory); + return listObj; + } + case MAP: + { + if (r.nextInt(20) == 0) { + return null; + } + final int keyPairCount = r.nextInt(100); + StandardMapObjectInspector mapObjectInspector = + (StandardMapObjectInspector) objectInspector; + ObjectInspector keyObjectInspector = + mapObjectInspector.getMapKeyObjectInspector(); + TypeInfo keyTypeInfo = + TypeInfoUtils.getTypeInfoFromObjectInspector( + keyObjectInspector); + ObjectInspector valueObjectInspector = + mapObjectInspector.getMapValueObjectInspector(); + TypeInfo valueTypeInfo = + TypeInfoUtils.getTypeInfoFromObjectInspector( + valueObjectInspector); + Object mapObj = mapObjectInspector.create(); + for (int i = 0; i < keyPairCount; i++) { + Object key = randomWritable(keyTypeInfo, keyObjectInspector); + Object value = randomWritable(valueTypeInfo, valueObjectInspector); + mapObjectInspector.put(mapObj, key, value); + } + return mapObj; + } + case STRUCT: + { + if (r.nextInt(20) == 0) { + return null; + } + StandardStructObjectInspector structObjectInspector = + (StandardStructObjectInspector) objectInspector; + List fieldRefsList = structObjectInspector.getAllStructFieldRefs(); + final int fieldCount = fieldRefsList.size(); + Object structObj = structObjectInspector.create(); + for (int i = 0; i < fieldCount; i++) { + StructField fieldRef = fieldRefsList.get(i); + ObjectInspector fieldObjectInspector = + fieldRef.getFieldObjectInspector(); + TypeInfo fieldTypeInfo = + TypeInfoUtils.getTypeInfoFromObjectInspector( + fieldObjectInspector); + Object fieldObj = randomWritable(fieldTypeInfo, fieldObjectInspector); + structObjectInspector.setStructFieldData(structObj, fieldRef, fieldObj); + } + return structObj; + } + case UNION: + { + StandardUnionObjectInspector unionObjectInspector = + (StandardUnionObjectInspector) objectInspector; + List objectInspectorList = unionObjectInspector.getObjectInspectors(); + final int unionCount = objectInspectorList.size(); + final byte tag = (byte) r.nextInt(unionCount); + Object unionObj = unionObjectInspector.create(); + ObjectInspector fieldObjectInspector = + objectInspectorList.get(tag); + TypeInfo fieldTypeInfo = + TypeInfoUtils.getTypeInfoFromObjectInspector( + fieldObjectInspector); + Object fieldObj = randomWritable(fieldTypeInfo, fieldObjectInspector); + return new StandardUnion(tag, fieldObj); } - } catch (Exception e) { - throw new RuntimeException("randomObject failed on column " + column + " type " + primitiveCategory, e); + default: + throw new RuntimeException("Unexpected category " + typeInfo.getCategory()); } } - public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo, String alphabet) { - int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); - String randomString = RandomTypeUtil.getRandString(r, alphabet, 100); - HiveChar hiveChar = new HiveChar(randomString, maxLength); - return hiveChar; + public Object randomPrimitiveObject(int column) { + return randomPrimitiveObject(r, primitiveTypeInfos[column]); + } + + public static Object randomPrimitiveObject(Random r, PrimitiveTypeInfo primitiveTypeInfo) { + switch (primitiveTypeInfo.getPrimitiveCategory()) { + case BOOLEAN: + return Boolean.valueOf(r.nextBoolean()); + case BYTE: + return Byte.valueOf((byte) r.nextInt()); + case SHORT: + return Short.valueOf((short) r.nextInt()); + case INT: + return Integer.valueOf(r.nextInt()); + case LONG: + return Long.valueOf(r.nextLong()); + case DATE: + return RandomTypeUtil.getRandDate(r); + case FLOAT: + return Float.valueOf(r.nextFloat() * 10 - 5); + case DOUBLE: + return Double.valueOf(r.nextDouble() * 10 - 5); + case STRING: + return RandomTypeUtil.getRandString(r); + case CHAR: + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + case VARCHAR: + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + case BINARY: + return getRandBinary(r, 1 + r.nextInt(100)); + case TIMESTAMP: + return RandomTypeUtil.getRandTimestamp(r); + case INTERVAL_YEAR_MONTH: + return getRandIntervalYearMonth(r); + case INTERVAL_DAY_TIME: + return getRandIntervalDayTime(r); + case DECIMAL: + { + HiveDecimal dec = getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + return dec; + } + default: + throw new Error("Unknown primitive category " + primitiveTypeInfo.getCategory()); + } } public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { - return getRandHiveChar(r, charTypeInfo, "abcdefghijklmnopqrstuvwxyz"); + int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; } public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo, String alphabet) { @@ -532,4 +849,12 @@ public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); return intervalDayTimeVal; } + + public static Object[] randomRow( + int keyCount, Random random, ArrayList keyPrimitiveObjectInspectorList, + PrimitiveCategory[] keyPrimitiveCategories, PrimitiveTypeInfo[] keyPrimitiveTypeInfos) { + VectorRandomRowSource source = new VectorRandomRowSource(); + source.init(keyCount, random, keyPrimitiveObjectInspectorList, keyPrimitiveCategories, keyPrimitiveTypeInfos); + return source.randomRow(); + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java index bbccc7fe90..64bcb11c62 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java +++ serde/src/java/org/apache/hadoop/hive/serde2/io/TimestampWritable.java @@ -133,7 +133,8 @@ public void set(Timestamp t) { timestamp.setNanos(0); return; } - this.timestamp = t; + timestamp.setTime(t.getTime()); + timestamp.setNanos(t.getNanos()); bytesEmpty = true; timestampEmpty = false; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayListObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayListObjectInspector.java new file mode 100644 index 0000000000..5ab8bcdba9 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayListObjectInspector.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.Arrays; +import java.util.List; + +/** + * A simple list object inspector based on an object array. + */ +public class ArrayListObjectInspector implements SettableListObjectInspector { + + private ObjectInspector listElementObjectInspector; + + protected ArrayListObjectInspector() { + super(); + } + + protected ArrayListObjectInspector(ObjectInspector listElementObjectInspector) { + this.listElementObjectInspector = listElementObjectInspector; + } + + @Override + public ObjectInspector getListElementObjectInspector() { + return listElementObjectInspector; + } + + @Override + public Object getListElement(Object data, int index) { + return ((Object[]) data)[index]; + } + + @Override + public int getListLength(Object data) { + return ((Object[]) data).length; + } + + @Override + public List getList(Object data) { + return Arrays.asList((Object[]) data); + } + + @Override + public Object create(int maxSize) { + return new Object[maxSize]; + } + + @Override + public Object set(Object list, int index, Object element) { + ((Object[]) list)[index] = element; + return list; + } + + @Override + public Object resize(Object list, int newSize) { + throw new UnsupportedOperationException(); + } + + @Override + public String getTypeName() { + return org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME + "<" + + listElementObjectInspector.getTypeName() + ">"; + } + + @Override + public final Category getCategory() { + return Category.LIST; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayMapObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayMapObjectInspector.java new file mode 100644 index 0000000000..5789448533 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayMapObjectInspector.java @@ -0,0 +1,139 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +/** + * A simple map object inspector based on an object array. + */ +public class ArrayMapObjectInspector implements SettableMapObjectInspector { + + private ObjectInspector mapKeyObjectInspector; + private ObjectInspector mapValueObjectInspector; + + protected ArrayMapObjectInspector() { + super(); + } + + protected ArrayMapObjectInspector( + ObjectInspector mapKeyObjectInspector, + ObjectInspector mapValueObjectInspector) { + this.mapKeyObjectInspector = mapKeyObjectInspector; + this.mapValueObjectInspector = mapValueObjectInspector; + } + + @Override + public Object create() { + throw new UnsupportedOperationException(); + } + + @Override + public Object put(Object map, Object key, Object value) { + Object[] array = (Object[]) map; + int i = 0; + while (array[i] != null) { + i++; + } + array[i] = key; + array[i + 1] = value; + return array; + } + + @Override + public Object remove(Object map, Object key) { + throw new UnsupportedOperationException(); + } + + @Override + public Object clear(Object map) { + Arrays.fill((Object[]) map, null); + return map; + } + + @Override + public ObjectInspector getMapKeyObjectInspector() { + return mapKeyObjectInspector; + } + + @Override + public ObjectInspector getMapValueObjectInspector() { + return mapValueObjectInspector; + } + + @Override + public Object getMapValueElement(Object data, Object key) { + Object[] array = (Object[]) data; + int size = array.length / 2; + for (int i = 0; i < size; i++) { + if (array[i * 2].equals(key)) { + return array[i * 2 + 1]; + } + } + return null; + } + + @Override + public Map getMap(Object data) { + Object[] array = (Object[]) data; + int size = array.length / 2; + Map map = new HashMap(size); + for (int i = 0; i < size; i++) { + map.put(array[i * 2], array[i * 2 + 1]); + } + return map; + } + + @Override + public int getMapSize(Object data) { + return ((Object[]) data).length / 2; + } + + @Override + public String getTypeName() { + return org.apache.hadoop.hive.serde.serdeConstants.MAP_TYPE_NAME + "<" + + mapKeyObjectInspector.getTypeName() + "," + + mapValueObjectInspector.getTypeName() + ">"; + } + + @Override + public final Category getCategory() { + return Category.MAP; + } + + public Object getKey(Object data, int index) { + return ((Object[]) data)[index * 2]; + } + + public Object getValue(Object data, int index) { + return ((Object[]) data)[index * 2 + 1]; + } + + public Object create(int length) { + return new Object[length * 2]; + } + + public Object put(Object map, int i, Object key, Object value) { + Object[] array = ((Object[]) map); + array[i * 2] = key; + array[i * 2 + 1] = value; + return map; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayStructObjectInspector.java new file mode 100644 index 0000000000..c14b2595e5 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayStructObjectInspector.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * A simple struct object inspector based on an object array. + */ +public class ArrayStructObjectInspector extends SettableStructObjectInspector { + + private List fields; + private int size; + + protected ArrayStructObjectInspector() { + super(); + } + + protected ArrayStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors, + List structComments) { + + size = Math.min(structFieldNames.size(), structFieldObjectInspectors.size()); + fields = new ArrayList(size); + if (structComments == null) { + for (int i = 0; i < size; i++) { + this.fields.add(new StandardStructObjectInspector.MyField( + i, structFieldNames.get(i), structFieldObjectInspectors.get(i))); + } + } else { + for (int i = 0; i < size; i++) { + this.fields.add(new StandardStructObjectInspector.MyField( + i, structFieldNames.get(i), structFieldObjectInspectors.get(i), structComments.get(i))); + } + } + } + + @Override + public Object create() { + return new Object[size]; + } + + @Override + public Object setStructFieldData(Object struct, StructField field, Object fieldValue) { + Object[] array = (Object[]) struct; + array[field.getFieldID()] = fieldValue; + return array; + } + + @Override + public List getAllStructFieldRefs() { + return fields; + } + + @Override + public StructField getStructFieldRef(String fieldName) { + for (StructField field : fields) { + if (field.getFieldName().equals(fieldName)) { + return field; + } + } + return null; + } + + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + Object[] array = (Object[]) data; + return array[fieldRef.getFieldID()]; + } + + @Override + public List getStructFieldsDataAsList(Object data) { + return Arrays.asList((Object[]) data); + } + + @Override + public String getTypeName() { + return ObjectInspectorUtils.getStandardStructTypeName(this); + } + + @Override + public Category getCategory() { + return Category.STRUCT; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayUnionObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayUnionObjectInspector.java new file mode 100644 index 0000000000..9886523aa2 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ArrayUnionObjectInspector.java @@ -0,0 +1,78 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.List; + +/** + * A simple union object inspector based on an object array. + */ +public class ArrayUnionObjectInspector extends SettableUnionObjectInspector { + private List unionObjectInspectors; + + protected ArrayUnionObjectInspector() { + super(); + } + + protected ArrayUnionObjectInspector(List unionObjectInspectors) { + this.unionObjectInspectors = unionObjectInspectors; + } + + @Override + public Object create() { + return new Object[2]; + } + + @Override + public Object addField(Object union, Object field) { + throw new UnsupportedOperationException(); + } + + @Override + public Object set(Object union, byte tag, Object field) { + Object[] array = (Object[]) union; + array[0] = tag; + array[1] = field; + return array; + } + + @Override + public List getObjectInspectors() { + return unionObjectInspectors; + } + + @Override + public byte getTag(Object o) { + return ((Number) ((Object[]) o)[0]).byteValue(); + } + + @Override + public Object getField(Object o) { + return ((Object[]) o)[1]; + } + + @Override + public String getTypeName() { + return ObjectInspectorUtils.getStandardUnionTypeName(this); + } + + @Override + public Category getCategory() { + return Category.UNION; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index cb63d59e18..ff224e3cdb 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -392,6 +392,98 @@ public static ColumnarStructObjectInspector getColumnarStructObjectInspector( return result; } + static ConcurrentHashMap + cachedArrayListObjectInspector = new ConcurrentHashMap(); + + public static ArrayListObjectInspector getArrayListObjectInspector(ObjectInspector listElementObjectInspector) { + ArrayListObjectInspector result = cachedArrayListObjectInspector + .get(listElementObjectInspector); + if (result == null) { + result = new ArrayListObjectInspector(listElementObjectInspector); + ArrayListObjectInspector prev = + cachedArrayListObjectInspector.putIfAbsent(listElementObjectInspector, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + static ConcurrentHashMap, ArrayMapObjectInspector> + cachedArrayMapObjectInspector = new ConcurrentHashMap, ArrayMapObjectInspector>(); + + public static ArrayMapObjectInspector getArrayMapObjectInspector( + ObjectInspector mapKeyObjectInspector, + ObjectInspector mapValueObjectInspector) { + ArrayList signature = new ArrayList(2); + signature.add(mapKeyObjectInspector); + signature.add(mapValueObjectInspector); + ArrayMapObjectInspector result = cachedArrayMapObjectInspector + .get(signature); + if (result == null) { + result = new ArrayMapObjectInspector(mapKeyObjectInspector, + mapValueObjectInspector); + ArrayMapObjectInspector prev = + cachedArrayMapObjectInspector.putIfAbsent(signature, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + static ConcurrentHashMap, ArrayUnionObjectInspector> + cachedArrayUnionObjectInspector = + new ConcurrentHashMap, ArrayUnionObjectInspector>(); + + public static ArrayUnionObjectInspector getArrayUnionObjectInspector( + List unionObjectInspectors) { + ArrayUnionObjectInspector result = cachedArrayUnionObjectInspector + .get(unionObjectInspectors); + if (result == null) { + result = new ArrayUnionObjectInspector(unionObjectInspectors); + ArrayUnionObjectInspector prev = + cachedArrayUnionObjectInspector.putIfAbsent(unionObjectInspectors, result); + if (prev != null) { + result = prev; + } + } + return result; + } + + static ConcurrentHashMap>, ArrayStructObjectInspector> cachedArrayStructObjectInspector = + new ConcurrentHashMap>, ArrayStructObjectInspector>(); + + public static ArrayStructObjectInspector getArrayStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors) { + return getArrayStructObjectInspector(structFieldNames, structFieldObjectInspectors, null); + } + + public static ArrayStructObjectInspector getArrayStructObjectInspector( + List structFieldNames, + List structFieldObjectInspectors, + List structComments) { + ArrayList> signature = new ArrayList>(3); + StringInternUtils.internStringsInList(structFieldNames); + signature.add(structFieldNames); + signature.add(structFieldObjectInspectors); + if (structComments != null) { + StringInternUtils.internStringsInList(structComments); + signature.add(structComments); + } + ArrayStructObjectInspector result = cachedArrayStructObjectInspector.get(signature); + if (result == null) { + result = new ArrayStructObjectInspector(structFieldNames, structFieldObjectInspectors, structComments); + ArrayStructObjectInspector prev = + cachedArrayStructObjectInspector.putIfAbsent(signature, result); + if (prev != null) { + result = prev; + } + } + return result; + } + private ObjectInspectorFactory() { // prevent instantiation } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableUnionObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableUnionObjectInspector.java index 564d8d6045..4b526b9099 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableUnionObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableUnionObjectInspector.java @@ -31,4 +31,6 @@ /* Add field to the object */ public abstract Object addField(Object union, Object field); + + public abstract Object set(Object union, byte tag, Object field); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java index f26c9ec69b..61a9027472 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java @@ -79,6 +79,31 @@ public byte getTag() { public String toString() { return tag + ":" + object; } + + @Override + public int hashCode() { + return object.hashCode() ^ tag; + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof StandardUnion)) { + return false; + } + StandardUnion that = (StandardUnion) obj; + if (this.tag != that.tag) { + return false; + } + + if (this.object == null) { + return that.object == null; + } else { + return this.object.equals(that.object); + } + } } /** @@ -130,4 +155,11 @@ public Object addField(Object union, Object field) { return a; } + @Override + public Object set(Object union, byte tag, Object field) { + StandardUnion standardUnion = ((StandardUnion) union); + standardUnion.tag = tag; + standardUnion.object = field; + return union; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 54964e407d..cda21da577 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -637,6 +637,82 @@ public static ObjectInspector getStandardWritableObjectInspectorFromTypeInfo( return result; } + static ConcurrentHashMap cachedArrayObjectInspector = + new ConcurrentHashMap(); + + public static ObjectInspector getArrayWritableObjectInspectorFromTypeInfo( + TypeInfo typeInfo) { + ObjectInspector result = cachedArrayObjectInspector.get(typeInfo); + if (result == null) { + switch (typeInfo.getCategory()) { + case PRIMITIVE: { + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) typeInfo); + break; + } + case LIST: { + ObjectInspector elementObjectInspector = + getArrayWritableObjectInspectorFromTypeInfo(((ListTypeInfo) typeInfo) + .getListElementTypeInfo()); + result = ObjectInspectorFactory + .getArrayListObjectInspector(elementObjectInspector); + break; + } + case MAP: { + MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; + ObjectInspector keyObjectInspector = + getArrayWritableObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo()); + ObjectInspector valueObjectInspector = + getArrayWritableObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo()); + result = ObjectInspectorFactory.getArrayMapObjectInspector( + keyObjectInspector, valueObjectInspector); + break; + } + case STRUCT: { + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo + .getAllStructFieldTypeInfos(); + List fieldObjectInspectors = new ArrayList( + fieldTypeInfos.size()); + for (int i = 0; i < fieldTypeInfos.size(); i++) { + fieldObjectInspectors + .add(getArrayWritableObjectInspectorFromTypeInfo(fieldTypeInfos + .get(i))); + } + result = ObjectInspectorFactory.getArrayStructObjectInspector( + fieldNames, fieldObjectInspectors); + break; + } + case UNION: { + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List objectTypeInfos = unionTypeInfo + .getAllUnionObjectTypeInfos(); + List fieldObjectInspectors = + new ArrayList(objectTypeInfos.size()); + for (int i = 0; i < objectTypeInfos.size(); i++) { + fieldObjectInspectors + .add(getArrayWritableObjectInspectorFromTypeInfo(objectTypeInfos + .get(i))); + } + result = ObjectInspectorFactory.getArrayUnionObjectInspector( + fieldObjectInspectors); + break; + } + + default: { + result = null; + } + } + ObjectInspector prev = + cachedArrayObjectInspector.putIfAbsent(typeInfo, result); + if (prev != null) { + result = prev; + } + } + return result; + } + static ConcurrentHashMap cachedStandardJavaObjectInspector = new ConcurrentHashMap(); diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index 065c1fa2ed..0560e04699 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -69,6 +69,8 @@ private boolean preFlattenIsRepeating; private boolean preFlattenNoNulls; + public int flatIndex; + /** * Constructor for super-class ColumnVector. This is not called directly, * but used to initialize inherited fields. diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java index 5e5f13ddc7..cb263ecc64 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatch.java @@ -38,6 +38,7 @@ public int[] selected; // array of positions of selected values public int[] projectedColumns; public int projectionSize; + public int flatColumns; private int dataColumnCount; private int partitionColumnCount;