diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 2cc80e2..3e97568 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -108,6 +108,8 @@ private VectorDeserializeRow() { private static class Field { + private boolean isPrimitive; + private Category category; private PrimitiveCategory primitiveCategory; @@ -137,6 +139,7 @@ private VectorDeserializeRow() { public Field(PrimitiveCategory primitiveCategory, DataTypePhysicalVariation dataTypePhysicalVariation, int maxLength) { + isPrimitive = true; this.category = Category.PRIMITIVE; this.primitiveCategory = primitiveCategory; this.dataTypePhysicalVariation = dataTypePhysicalVariation; @@ -149,6 +152,7 @@ public Field(PrimitiveCategory primitiveCategory, DataTypePhysicalVariation data } public Field(Category category, ComplexTypeHelper complexTypeHelper, TypeInfo typeInfo) { + isPrimitive = false; this.category = category; this.objectInspector = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo); this.primitiveCategory = null; @@ -159,6 +163,10 @@ public Field(Category category, ComplexTypeHelper complexTypeHelper, TypeInfo ty this.complexTypeHelper = complexTypeHelper; } + public boolean getIsPrimitive() { + return isPrimitive; + } + public Category getCategory() { return category; } @@ -336,7 +344,7 @@ private void addTopLevelConversion(int logicalColumnIndex) { final Field field = topLevelFields[logicalColumnIndex]; field.setIsConvert(true); - if (field.getCategory() == Category.PRIMITIVE) { + if (field.getIsPrimitive()) { field.setConversionWritable( VectorizedBatchUtil.getPrimitiveWritable(field.getPrimitiveCategory())); @@ -750,24 +758,25 @@ private void storeComplexFieldRowColumn(ColumnVector fieldColVector, return; } - switch (field.getCategory()) { - case PRIMITIVE: + if (field.getIsPrimitive()) { storePrimitiveRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); - break; - case LIST: - storeListRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); - break; - case MAP: - storeMapRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); - break; - case STRUCT: - storeStructRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); - break; - case UNION: - storeUnionRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); - break; - default: - throw new RuntimeException("Category " + field.getCategory() + " not supported"); + } else { + switch (field.getCategory()) { + case LIST: + storeListRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); + break; + case MAP: + storeMapRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); + break; + case STRUCT: + storeStructRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); + break; + case UNION: + storeUnionRowColumn(fieldColVector, field, batchIndex, canRetainByteRef); + break; + default: + throw new RuntimeException("Category " + field.getCategory() + " not supported"); + } } fieldColVector.isNull[batchIndex] = false; @@ -905,24 +914,25 @@ private void storeRowColumn(VectorizedRowBatch batch, int batchIndex, final int projectionColumnNum = projectionColumnNums[logicalColumnIndex]; final ColumnVector colVector = batch.cols[projectionColumnNum]; - switch (field.getCategory()) { - case PRIMITIVE: + if (field.getIsPrimitive()) { storePrimitiveRowColumn(colVector, field, batchIndex, canRetainByteRef); - break; - case LIST: - storeListRowColumn(colVector, field, batchIndex, canRetainByteRef); - break; - case MAP: - storeMapRowColumn(colVector, field, batchIndex, canRetainByteRef); - break; - case STRUCT: - storeStructRowColumn(colVector, field, batchIndex, canRetainByteRef); - break; - case UNION: - storeUnionRowColumn(colVector, field, batchIndex, canRetainByteRef); - break; - default: - throw new RuntimeException("Category " + field.getCategory() + " not supported"); + } else { + switch (field.getCategory()) { + case LIST: + storeListRowColumn(colVector, field, batchIndex, canRetainByteRef); + break; + case MAP: + storeMapRowColumn(colVector, field, batchIndex, canRetainByteRef); + break; + case STRUCT: + storeStructRowColumn(colVector, field, batchIndex, canRetainByteRef); + break; + case UNION: + storeUnionRowColumn(colVector, field, batchIndex, canRetainByteRef); + break; + default: + throw new RuntimeException("Category " + field.getCategory() + " not supported"); + } } // We always set the null flag to false when there is a value. @@ -947,24 +957,25 @@ private void convertRowColumn(VectorizedRowBatch batch, int batchIndex, final ColumnVector colVector = batch.cols[projectionColumnIndex]; final Object convertSourceWritable; - switch (field.getCategory()) { - case PRIMITIVE: + if (field.getIsPrimitive()) { convertSourceWritable = convertPrimitiveRowColumn(batchIndex, field); - break; - case LIST: - convertSourceWritable = convertListRowColumn(colVector, batchIndex, field); - break; - case MAP: - convertSourceWritable = convertMapRowColumn(colVector, batchIndex, field); - break; - case STRUCT: - convertSourceWritable = convertStructRowColumn(colVector, batchIndex, field); - break; - case UNION: - convertSourceWritable = convertUnionRowColumn(colVector, batchIndex, field); - break; - default: - throw new RuntimeException(); + } else { + switch (field.getCategory()) { + case LIST: + convertSourceWritable = convertListRowColumn(colVector, batchIndex, field); + break; + case MAP: + convertSourceWritable = convertMapRowColumn(colVector, batchIndex, field); + break; + case STRUCT: + convertSourceWritable = convertStructRowColumn(colVector, batchIndex, field); + break; + case UNION: + convertSourceWritable = convertUnionRowColumn(colVector, batchIndex, field); + break; + default: + throw new RuntimeException(); + } } /* @@ -984,10 +995,11 @@ private Object convertComplexFieldRowColumn(ColumnVector colVector, int batchInd } colVector.isNull[batchIndex] = false; + if (field.getIsPrimitive()) { + return convertPrimitiveRowColumn(batchIndex, field); + } switch (field.getCategory()) { - case PRIMITIVE: - return convertPrimitiveRowColumn(batchIndex, field); case LIST: return convertListRowColumn(colVector, batchIndex, field); case MAP: diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java index 70f124e..b388baa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; -import java.util.Arrays; import java.util.List; import java.util.Map; @@ -58,11 +57,32 @@ private T serializeWrite; - private TypeInfo[] typeInfos; + private Field root; - private ObjectInspector[] objectInspectors; + private static class Field { + Field[] children; - private int[] outputColumnNums; + boolean isPrimitive; + Category category; + PrimitiveCategory primitiveCategory; + TypeInfo typeInfo; + + int count; + + ObjectInspector objectInspector; + int outputColumnNum; + + Field() { + children = null; + isPrimitive = false; + category = null; + primitiveCategory = null; + typeInfo = null; + count = 0; + objectInspector = null; + outputColumnNum = -1; + } + } private VectorExtractRow vectorExtractRow; @@ -76,18 +96,72 @@ public VectorSerializeRow(T serializeWrite) { private VectorSerializeRow() { } - public void init(List typeNames, int[] columnMap) throws HiveException { + private Field[] createFields(TypeInfo[] typeInfos) { + final Field[] children = new Field[typeInfos.length]; + for (int i = 0; i < typeInfos.length; i++) { + children[i] = createField(typeInfos[i]); + } + return children; + } - final int size = typeNames.size(); - typeInfos = new TypeInfo[size]; - outputColumnNums = Arrays.copyOf(columnMap, size); - objectInspectors = new ObjectInspector[size]; - for (int i = 0; i < size; i++) { - final TypeInfo typeInfo = - TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - typeInfos[i] = typeInfo; - objectInspectors[i] = + private Field createField(TypeInfo typeInfo) { + final Field field = new Field(); + final Category category = typeInfo.getCategory(); + field.category = category; + field.typeInfo = typeInfo; + if (category == Category.PRIMITIVE) { + field.isPrimitive = true; + field.primitiveCategory = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(); + } else { + field.isPrimitive = false; + field.objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); + switch (category) { + case LIST: + field.children = new Field[1]; + field.children[0] = createField(((ListTypeInfo) typeInfo).getListElementTypeInfo()); + break; + case MAP: + field.children = new Field[2]; + field.children[0] = createField(((MapTypeInfo) typeInfo).getMapKeyTypeInfo()); + field.children[1] = createField(((MapTypeInfo) typeInfo).getMapValueTypeInfo()); + break; + case STRUCT: + StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + field.children = createFields(fieldTypeInfos.toArray(new TypeInfo[fieldTypeInfos.size()])); + break; + case UNION: + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List objectTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos(); + field.children = createFields(objectTypeInfos.toArray(new TypeInfo[objectTypeInfos.size()])); + break; + default: + throw new RuntimeException(); + } + field.count = field.children.length; + } + return field; + } + + public void init(List typeNames, int[] columnMap) throws HiveException { + + TypeInfo[] typeInfos = + TypeInfoUtils.typeInfosFromTypeNames(typeNames).toArray(new TypeInfo[typeNames.size()]); + + final int count = typeInfos.length; + + root = new Field(); + root.isPrimitive = false; + root.category = Category.STRUCT; + root.children = createFields(typeInfos); + root.count = count; + root.objectInspector = null; + int[] outputColumnNums = new int[count]; + for (int i = 0; i < count; i++) { + final int outputColumnNum = columnMap[i]; + outputColumnNums[i] = outputColumnNum; + root.children[i].outputColumnNum = outputColumnNum; } vectorExtractRow.init(typeInfos, outputColumnNums); @@ -95,17 +169,19 @@ public void init(List typeNames, int[] columnMap) throws HiveException { public void init(List typeNames) throws HiveException { - final int size = typeNames.size(); - typeInfos = new TypeInfo[size]; - outputColumnNums = new int[size]; - objectInspectors = new ObjectInspector[size]; - for (int i = 0; i < size; i++) { - final TypeInfo typeInfo = - TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i)); - typeInfos[i] = typeInfo; - objectInspectors[i] = - TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo); - outputColumnNums[i] = i; + TypeInfo[] typeInfos = + TypeInfoUtils.typeInfosFromTypeNames(typeNames).toArray(new TypeInfo[typeNames.size()]); + + final int count = typeInfos.length; + + root = new Field(); + root.isPrimitive = false; + root.category = Category.STRUCT; + root.children = createFields(typeInfos); + root.count = count; + root.objectInspector = null; + for (int i = 0; i < count; i++) { + root.children[i].outputColumnNum = i; } vectorExtractRow.init(typeInfos); @@ -114,36 +190,44 @@ public void init(List typeNames) throws HiveException { public void init(TypeInfo[] typeInfos) throws HiveException { - final int size = typeInfos.length; - this.typeInfos = Arrays.copyOf(typeInfos, size); - outputColumnNums = new int[size]; - objectInspectors = new ObjectInspector[size]; - for (int i = 0; i < size; i++) { - objectInspectors[i] = - TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos[i]); - outputColumnNums[i] = i; + final int count = typeInfos.length; + + root = new Field(); + root.isPrimitive = false; + root.category = Category.STRUCT; + root.children = createFields(typeInfos); + root.count = count; + root.objectInspector = null; + for (int i = 0; i < count; i++) { + root.children[i].outputColumnNum = i; } - vectorExtractRow.init(this.typeInfos, outputColumnNums); + vectorExtractRow.init(typeInfos); } public void init(TypeInfo[] typeInfos, int[] columnMap) throws HiveException { - final int size = typeInfos.length; - this.typeInfos = Arrays.copyOf(typeInfos, size); - outputColumnNums = Arrays.copyOf(columnMap, size); - objectInspectors = new ObjectInspector[size]; - for (int i = 0; i < size; i++) { - objectInspectors[i] = - TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos[i]); + final int count = typeInfos.length; + + root = new Field(); + root.isPrimitive = false; + root.category = Category.STRUCT; + root.children = createFields(typeInfos); + root.count = count; + root.objectInspector = null; + int[] outputColumnNums = new int[count]; + for (int i = 0; i < count; i++) { + final int outputColumnNum = columnMap[i]; + outputColumnNums[i] = outputColumnNum; + root.children[i].outputColumnNum = outputColumnNum; } - vectorExtractRow.init(this.typeInfos, outputColumnNums); + vectorExtractRow.init(typeInfos, outputColumnNums); } public int getCount() { - return typeInfos.length; + return root.count; } public void setOutput(Output output) { @@ -165,15 +249,17 @@ public void serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOEx hasAnyNulls = false; isAllNulls = true; - for (int i = 0; i < typeInfos.length; i++) { - final ColumnVector colVector = batch.cols[outputColumnNums[i]]; - serializeWrite(colVector, typeInfos[i], objectInspectors[i], batchIndex); + final Field[] children = root.children; + final int size = root.count; + for (int i = 0; i < size; i++) { + final Field field = children[i]; + final ColumnVector colVector = batch.cols[field.outputColumnNum]; + serializeWrite(colVector, field, batchIndex); } } private void serializeWrite( - ColumnVector colVector, TypeInfo typeInfo, - ObjectInspector objectInspector, int batchIndex) throws IOException { + ColumnVector colVector, Field field, int batchIndex) throws IOException { int adjustedBatchIndex; if (colVector.isRepeating) { @@ -188,37 +274,34 @@ private void serializeWrite( } isAllNulls = false; - final Category category = typeInfo.getCategory(); + if (field.isPrimitive) { + serializePrimitiveWrite(colVector, field, adjustedBatchIndex); + return; + } + final Category category = field.category; switch (category) { - case PRIMITIVE: - serializePrimitiveWrite(colVector, (PrimitiveTypeInfo) typeInfo, adjustedBatchIndex); - break; case LIST: serializeListWrite( (ListColumnVector) colVector, - (ListTypeInfo) typeInfo, - (ListObjectInspector) objectInspector, + field, adjustedBatchIndex); break; case MAP: serializeMapWrite( (MapColumnVector) colVector, - (MapTypeInfo) typeInfo, - (MapObjectInspector) objectInspector, + field, adjustedBatchIndex); break; case STRUCT: serializeStructWrite( (StructColumnVector) colVector, - (StructTypeInfo) typeInfo, - (StructObjectInspector) objectInspector, + field, adjustedBatchIndex); break; case UNION: serializeUnionWrite( (UnionColumnVector) colVector, - (UnionTypeInfo) typeInfo, - (UnionObjectInspector) objectInspector, + field, adjustedBatchIndex); break; default: @@ -227,30 +310,33 @@ private void serializeWrite( } private void serializeUnionWrite( - UnionColumnVector colVector, UnionTypeInfo typeInfo, - UnionObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + UnionColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException { + + UnionTypeInfo typeInfo = (UnionTypeInfo) field.typeInfo; + UnionObjectInspector objectInspector = (UnionObjectInspector) field.objectInspector; final byte tag = (byte) colVector.tags[adjustedBatchIndex]; final ColumnVector fieldColumnVector = colVector.fields[tag]; - final TypeInfo objectTypeInfo = typeInfo.getAllUnionObjectTypeInfos().get(tag); + final Field childField = field.children[tag]; serializeWrite.beginUnion(tag); serializeWrite( fieldColumnVector, - objectTypeInfo, - objectInspector.getObjectInspectors().get(tag), + childField, adjustedBatchIndex); serializeWrite.finishUnion(); } private void serializeStructWrite( - StructColumnVector colVector, StructTypeInfo typeInfo, - StructObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + StructColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException { + + StructTypeInfo typeInfo = (StructTypeInfo) field.typeInfo; + StructObjectInspector objectInspector = (StructObjectInspector) field.objectInspector; final ColumnVector[] fieldColumnVectors = colVector.fields; - final List fieldTypeInfos = typeInfo.getAllStructFieldTypeInfos(); + final Field[] children = field.children; final List structFields = objectInspector.getAllStructFieldRefs(); - final int size = fieldTypeInfos.size(); + final int size = field.count; final List list = (List) vectorExtractRow.extractRowColumn( colVector, typeInfo, objectInspector, adjustedBatchIndex); @@ -262,21 +348,22 @@ private void serializeStructWrite( } serializeWrite( fieldColumnVectors[i], - fieldTypeInfos.get(i), - structFields.get(i).getFieldObjectInspector(), + children[i], adjustedBatchIndex); } serializeWrite.finishStruct(); } private void serializeMapWrite( - MapColumnVector colVector, MapTypeInfo typeInfo, - MapObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + MapColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException { + + MapTypeInfo typeInfo = (MapTypeInfo) field.typeInfo; + MapObjectInspector objectInspector = (MapObjectInspector) field.objectInspector; final ColumnVector keyColumnVector = colVector.keys; final ColumnVector valueColumnVector = colVector.values; - final TypeInfo keyTypeInfo = typeInfo.getMapKeyTypeInfo(); - final TypeInfo valueTypeInfo = typeInfo.getMapValueTypeInfo(); + final Field keyField = field.children[0]; + final Field valueField = field.children[1]; final int offset = (int) colVector.offsets[adjustedBatchIndex]; final int size = (int) colVector.lengths[adjustedBatchIndex]; @@ -288,21 +375,21 @@ private void serializeMapWrite( if (i > 0) { serializeWrite.separateKeyValuePair(); } - serializeWrite(keyColumnVector, keyTypeInfo, - objectInspector.getMapKeyObjectInspector(), offset + i); + serializeWrite(keyColumnVector, keyField, offset + i); serializeWrite.separateKey(); - serializeWrite(valueColumnVector, valueTypeInfo, - objectInspector.getMapValueObjectInspector(), offset + i); + serializeWrite(valueColumnVector, valueField, offset + i); } serializeWrite.finishMap(); } private void serializeListWrite( - ListColumnVector colVector, ListTypeInfo typeInfo, - ListObjectInspector objectInspector, int adjustedBatchIndex) throws IOException { + ListColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException { + + final ListTypeInfo typeInfo = (ListTypeInfo) field.typeInfo; + final ListObjectInspector objectInspector = (ListObjectInspector) field.objectInspector; final ColumnVector childColumnVector = colVector.child; - final TypeInfo elementTypeInfo = typeInfo.getListElementTypeInfo(); + final Field elementField = field.children[0]; final int offset = (int) colVector.offsets[adjustedBatchIndex]; final int size = (int) colVector.lengths[adjustedBatchIndex]; @@ -316,15 +403,15 @@ private void serializeListWrite( serializeWrite.separateList(); } serializeWrite( - childColumnVector, elementTypeInfo, elementObjectInspector, offset + i); + childColumnVector, elementField, offset + i); } serializeWrite.finishList(); } private void serializePrimitiveWrite( - ColumnVector colVector, PrimitiveTypeInfo typeInfo, int adjustedBatchIndex) throws IOException { + ColumnVector colVector, Field field, int adjustedBatchIndex) throws IOException { - final PrimitiveCategory primitiveCategory = typeInfo.getPrimitiveCategory(); + final PrimitiveCategory primitiveCategory = field.primitiveCategory; switch (primitiveCategory) { case BOOLEAN: serializeWrite.writeBoolean(((LongColumnVector) colVector).vector[adjustedBatchIndex] != 0);