commit 39d1de3abb2e6d5ed4faa7069f1b164b00a8014d Author: Owen O'Malley Date: Fri Jul 24 14:10:15 2015 -0700 HIVE-11212. Implement compound types for vectorization. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 46c2a78..d547048 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -2296,36 +2296,51 @@ public static String mapTypeNameSynonyms(String typeName) { } public static ColumnVector.Type getColumnVectorTypeFromTypeInfo(TypeInfo typeInfo) throws HiveException { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - - switch (primitiveCategory) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case DATE: - case TIMESTAMP: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - return ColumnVector.Type.LONG; - - case FLOAT: - case DOUBLE: - return ColumnVector.Type.DOUBLE; - - case STRING: - case CHAR: - case VARCHAR: - case BINARY: - return ColumnVector.Type.BYTES; - - case DECIMAL: - return ColumnVector.Type.DECIMAL; - - default: - throw new HiveException("Unexpected primitive type category " + primitiveCategory); + switch (typeInfo.getCategory()) { + case STRUCT: + return Type.STRUCT; + case UNION: + return Type.UNION; + case LIST: + return Type.LIST; + case MAP: + return Type.MAP; + case PRIMITIVE: { + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + + switch (primitiveCategory) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case DATE: + case TIMESTAMP: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY_TIME: + return ColumnVector.Type.LONG; + + case FLOAT: + case DOUBLE: + return ColumnVector.Type.DOUBLE; + + case STRING: + case CHAR: + case VARCHAR: + case BINARY: + return ColumnVector.Type.BYTES; + + case DECIMAL: + return ColumnVector.Type.DECIMAL; + + default: + throw new HiveException("Unexpected primitive type category " + primitiveCategory); + } + } + default: + throw new HiveException("Unexpected type category " + + typeInfo.getCategory()); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java index 3780113..898fdd7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java @@ -45,6 +45,8 @@ import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; @@ -52,6 +54,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -112,6 +115,80 @@ public static void setBatchSize(VectorizedRowBatch batch, int size) { } /** + * Convert an ObjectInspector into a ColumnVector of the appropriate + * type. + */ + public static ColumnVector createColumnVector(ObjectInspector inspector + ) throws HiveException { + switch(inspector.getCategory()) { + case PRIMITIVE: + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) inspector; + switch(poi.getPrimitiveCategory()) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + case TIMESTAMP: + case DATE: + case INTERVAL_YEAR_MONTH: + case INTERVAL_DAY_TIME: + return new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case FLOAT: + case DOUBLE: + return new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case BINARY: + case STRING: + case CHAR: + case VARCHAR: + return new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); + case DECIMAL: + DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo(); + return new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + tInfo.precision(), tInfo.scale()); + default: + throw new HiveException("Vectorizaton is not supported for datatype:" + + poi.getPrimitiveCategory()); + } + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector) inspector; + List fieldList = soi.getAllStructFieldRefs(); + ColumnVector[] children = new ColumnVector[fieldList.size()]; + for(int i=0; i < children.length; ++i) { + children[i] = + createColumnVector(fieldList.get(i).getFieldObjectInspector()); + } + return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + children); + } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) inspector; + List fieldList = uoi.getObjectInspectors(); + ColumnVector[] children = new ColumnVector[fieldList.size()]; + for(int i=0; i < children.length; ++i) { + children[i] = createColumnVector(fieldList.get(i)); + } + return new UnionColumnVector(VectorizedRowBatch.DEFAULT_SIZE, children); + } + case LIST: { + ListObjectInspector loi = (ListObjectInspector) inspector; + return new ListColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + createColumnVector(loi.getListElementObjectInspector())); + } + case MAP: { + MapObjectInspector moi = (MapObjectInspector) inspector; + return new MapColumnVector(VectorizedRowBatch.DEFAULT_SIZE, + createColumnVector(moi.getMapKeyObjectInspector()), + createColumnVector(moi.getMapValueObjectInspector())); + } + default: + throw new HiveException("Vectorization is not supported for datatype:" + + inspector.getCategory()); + } + + } + + /** * Walk through the object inspector and add column vectors * * @param oi @@ -129,47 +206,7 @@ private static void allocateColumnVector(StructObjectInspector oi, final List fields = oi.getAllStructFieldRefs(); for(StructField field : fields) { ObjectInspector fieldObjectInspector = field.getFieldObjectInspector(); - switch(fieldObjectInspector.getCategory()) { - case PRIMITIVE: - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) fieldObjectInspector; - switch(poi.getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case TIMESTAMP: - case DATE: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - cvList.add(new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); - break; - case FLOAT: - case DOUBLE: - cvList.add(new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); - break; - case BINARY: - case STRING: - case CHAR: - case VARCHAR: - cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE)); - break; - case DECIMAL: - DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo(); - cvList.add(new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, - tInfo.precision(), tInfo.scale())); - break; - default: - throw new HiveException("Vectorizaton is not supported for datatype:" - + poi.getPrimitiveCategory()); - } - break; - case STRUCT: - throw new HiveException("Struct not supported"); - default: - throw new HiveException("Flattening is not supported for datatype:" - + fieldObjectInspector.getCategory()); - } + cvList.add(createColumnVector(fieldObjectInspector)); } } @@ -611,6 +648,48 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect return result; } + static ColumnVector cloneColumnVector(ColumnVector source + ) throws HiveException{ + if (source instanceof LongColumnVector) { + return new LongColumnVector(((LongColumnVector) source).vector.length); + } else if (source instanceof DoubleColumnVector) { + return new DoubleColumnVector(((DoubleColumnVector) source).vector.length); + } else if (source instanceof BytesColumnVector) { + return new BytesColumnVector(((BytesColumnVector) source).vector.length); + } else if (source instanceof DecimalColumnVector) { + DecimalColumnVector decColVector = (DecimalColumnVector) source; + return new DecimalColumnVector(decColVector.vector.length, + decColVector.precision, + decColVector.scale); + } else if (source instanceof ListColumnVector) { + ListColumnVector src = (ListColumnVector) source; + ColumnVector child = cloneColumnVector(src.child); + return new ListColumnVector(src.offsets.length, child); + } else if (source instanceof MapColumnVector) { + MapColumnVector src = (MapColumnVector) source; + ColumnVector keys = cloneColumnVector(src.keys); + ColumnVector values = cloneColumnVector(src.values); + return new MapColumnVector(src.offsets.length, keys, values); + } else if (source instanceof StructColumnVector) { + StructColumnVector src = (StructColumnVector) source; + ColumnVector[] copy = new ColumnVector[src.fields.length]; + for(int i=0; i < copy.length; ++i) { + copy[i] = cloneColumnVector(src.fields[i]); + } + return new StructColumnVector(VectorizedRowBatch.DEFAULT_SIZE, copy); + } else if (source instanceof UnionColumnVector) { + UnionColumnVector src = (UnionColumnVector) source; + ColumnVector[] copy = new ColumnVector[src.fields.length]; + for(int i=0; i < copy.length; ++i) { + copy[i] = cloneColumnVector(src.fields[i]); + } + return new UnionColumnVector(src.tags.length, copy); + } else + throw new HiveException("Column vector class " + + source.getClass().getName() + + " is not supported!"); + } + /** * Make a new (scratch) batch, which is exactly "like" the batch provided, except that it's empty * @param batch the batch to imitate @@ -620,27 +699,13 @@ public static StandardStructObjectInspector convertToStandardStructObjectInspect public static VectorizedRowBatch makeLike(VectorizedRowBatch batch) throws HiveException { VectorizedRowBatch newBatch = new VectorizedRowBatch(batch.numCols); for (int i = 0; i < batch.numCols; i++) { - ColumnVector colVector = batch.cols[i]; - if (colVector != null) { - ColumnVector newColVector; - if (colVector instanceof LongColumnVector) { - newColVector = new LongColumnVector(); - } else if (colVector instanceof DoubleColumnVector) { - newColVector = new DoubleColumnVector(); - } else if (colVector instanceof BytesColumnVector) { - newColVector = new BytesColumnVector(); - } else if (colVector instanceof DecimalColumnVector) { - DecimalColumnVector decColVector = (DecimalColumnVector) colVector; - newColVector = new DecimalColumnVector(decColVector.precision, decColVector.scale); - } else { - throw new HiveException("Column vector class " + colVector.getClass().getName() + - " is not supported!"); - } - newBatch.cols[i] = newColVector; + if (batch.cols[i] != null) { + newBatch.cols[i] = cloneColumnVector(batch.cols[i]); newBatch.cols[i].init(); } } - newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns, batch.projectedColumns.length); + newBatch.projectedColumns = Arrays.copyOf(batch.projectedColumns, + batch.projectedColumns.length); newBatch.projectionSize = batch.projectionSize; newBatch.reset(); return newBatch; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index 82d4a8f..4767636 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -289,54 +289,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException || ((partitionValues != null) && partitionValues.containsKey(fieldRefs.get(j).getFieldName()))) { ObjectInspector foi = fieldRefs.get(j).getFieldObjectInspector(); - switch (foi.getCategory()) { - case PRIMITIVE: { - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi; - // Vectorization currently only supports the following data types: - // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, CHAR, VARCHAR, TIMESTAMP, - // DATE and DECIMAL - switch (poi.getPrimitiveCategory()) { - case BOOLEAN: - case BYTE: - case SHORT: - case INT: - case LONG: - case TIMESTAMP: - case DATE: - case INTERVAL_YEAR_MONTH: - case INTERVAL_DAY_TIME: - result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - break; - case FLOAT: - case DOUBLE: - result.cols[j] = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - break; - case BINARY: - case STRING: - case CHAR: - case VARCHAR: - result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE); - break; - case DECIMAL: - DecimalTypeInfo tInfo = (DecimalTypeInfo) poi.getTypeInfo(); - result.cols[j] = new DecimalColumnVector(VectorizedRowBatch.DEFAULT_SIZE, - tInfo.precision(), tInfo.scale()); - break; - default: - throw new RuntimeException("Vectorizaton is not supported for datatype:" - + poi.getPrimitiveCategory()); - } - break; - } - case LIST: - case MAP: - case STRUCT: - case UNION: - throw new HiveException("Vectorizaton is not supported for datatype:" - + foi.getCategory()); - default: - throw new HiveException("Unknown ObjectInspector category!"); - } + result.cols[j] = VectorizedBatchUtil.createColumnVector(foi); } } result.numCols = fieldRefs.size(); diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 02c52fa..6587c80 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -297,8 +297,18 @@ public void fill(byte[] value) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - BytesColumnVector in = (BytesColumnVector) inputVector; - setVal(outElementNum, in.vector[inputElementNum], in.start[inputElementNum], in.length[inputElementNum]); + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + BytesColumnVector in = (BytesColumnVector) inputVector; + setVal(outElementNum, in.vector[inputElementNum], + in.start[inputElementNum], in.length[inputElementNum]); + } else { + isNull[outElementNum] = true; + noNulls = false; + } } @Override @@ -319,4 +329,28 @@ public void stringifyValue(StringBuilder buffer, int row) { buffer.append("null"); } } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + int[] oldStart = start; + start = new int[size]; + int[] oldLength = length; + length = new int[size]; + byte[][] oldVector = vector; + vector = new byte[size][]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldVector[0]; + start[0] = oldStart[0]; + length[0] = oldLength[0]; + } else { + System.arraycopy(oldVector, 0, vector, 0, oldVector.length); + System.arraycopy(oldStart, 0, start, 0 , oldStart.length); + System.arraycopy(oldLength, 0, length, 0, oldLength.length); + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java index cb75c2c..13c40fe 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ColumnVector.java @@ -35,11 +35,15 @@ /* * The current kinds of column vectors. */ - public static enum Type { + public enum Type { LONG, DOUBLE, BYTES, - DECIMAL + DECIMAL, + STRUCT, + LIST, + MAP, + UNION } /* @@ -72,6 +76,8 @@ public ColumnVector(int len) { isNull = new boolean[len]; noNulls = true; isRepeating = false; + preFlattenNoNulls = true; + preFlattenIsRepeating = false; } /** @@ -81,11 +87,13 @@ public ColumnVector(int len) { * - sets isRepeating to false */ public void reset() { - if (false == noNulls) { + if (!noNulls) { Arrays.fill(isNull, false); } noNulls = true; isRepeating = false; + preFlattenNoNulls = true; + preFlattenIsRepeating = false; } abstract public void flatten(boolean selectedInUse, int[] sel, int size); @@ -93,7 +101,8 @@ public void reset() { // Simplify vector by brute-force flattening noNulls if isRepeating // This can be used to reduce combinatorial explosion of code paths in VectorExpressions // with many arguments. - public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { + protected void flattenRepeatingNulls(boolean selectedInUse, int[] sel, + int size) { boolean nullFillValue; @@ -116,13 +125,13 @@ public void flattenRepeatingNulls(boolean selectedInUse, int[] sel, int size) { noNulls = false; } - public void flattenNoNulls(boolean selectedInUse, int[] sel, int size) { + protected void flattenNoNulls(boolean selectedInUse, int[] sel, + int size) { if (noNulls) { noNulls = false; if (selectedInUse) { for (int j = 0; j < size; j++) { - int i = sel[j]; - isNull[i] = false; + isNull[sel[j]] = false; } } else { Arrays.fill(isNull, 0, size, false); @@ -151,8 +160,10 @@ protected void flattenPush() { /** * Set the element in this column vector from the given input vector. + * This method can assume that the output does not have isRepeating set. */ - public abstract void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector); + public abstract void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector); /** * Initialize the column vector. This method can be overridden by specific column vector types. @@ -164,6 +175,27 @@ public void init() { } /** + * Ensure the ColumnVector can hold at least size values. + * This method is deliberately *not* recursive because the complex types + * can easily have more (or less) children than the upper levels. + * @param size the new minimum size + * @param presesrveData should the old data be preserved? + */ + public void ensureSize(int size, boolean presesrveData) { + if (isNull.length < size) { + boolean[] oldArray = isNull; + isNull = new boolean[size]; + if (presesrveData && !noNulls) { + if (isRepeating) { + isNull[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, isNull, 0, oldArray.length); + } + } + } + } + + /** * Print the value for this column into the given string builder. * @param buffer the buffer to print into * @param row the id of the row to print diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java index 74a9d5f..ccd30e2 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DecimalColumnVector.java @@ -57,12 +57,23 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - HiveDecimal hiveDec = ((DecimalColumnVector) inputVector).vector[inputElementNum].getHiveDecimal(precision, scale); - if (hiveDec == null) { - noNulls = false; - isNull[outElementNum] = true; + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + HiveDecimal hiveDec = + ((DecimalColumnVector) inputVector).vector[inputElementNum] + .getHiveDecimal(precision, scale); + if (hiveDec == null) { + isNull[outElementNum] = true; + noNulls = false; + } else { + isNull[outElementNum] = false; + vector[outElementNum].set(hiveDec); + } } else { - vector[outElementNum].set(hiveDec); + isNull[outElementNum] = true; + noNulls = false; } } @@ -103,4 +114,20 @@ public void setNullDataValue(int elementNum) { HiveDecimal minimumNonZeroValue = HiveDecimal.create(BigInteger.ONE, scale); vector[elementNum].set(minimumNonZeroValue); } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + HiveDecimalWritable[] oldArray = vector; + vector = new HiveDecimalWritable[size]; + if (preserveData) { + // we copy all of the values to avoid creating more objects + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + for(int i= oldArray.length; i < vector.length; ++i) { + vector[i] = new HiveDecimalWritable(HiveDecimal.ZERO); + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java index 4a7811d..4351d15 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/DoubleColumnVector.java @@ -126,7 +126,17 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - vector[outElementNum] = ((DoubleColumnVector) inputVector).vector[inputElementNum]; + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + vector[outElementNum] = + ((DoubleColumnVector) inputVector).vector[inputElementNum]; + } else { + isNull[outElementNum] = true; + noNulls = false; + } } @Override @@ -140,4 +150,20 @@ public void stringifyValue(StringBuilder buffer, int row) { buffer.append("null"); } } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + double[] oldArray = vector; + vector = new double[size]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java new file mode 100644 index 0000000..66240dd --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/ListColumnVector.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of list objects. + * + * Each list is composed of a range of elements in the underlying child + * ColumnVector. The range for list i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public class ListColumnVector extends MultiValuedColumnVector { + + public ColumnVector child; + + public ListColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE, null); + } + + /** + * Constructor for ListColumnVector. + * + * @param len Vector length + * @param child The child vector + */ + public ListColumnVector(int len, ColumnVector child) { + super(len); + this.child = child; + } + + @Override + protected void childFlatten(boolean useSelected, int[] selected, int size) { + child.flatten(useSelected, selected, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + ListColumnVector input = (ListColumnVector) inputVector; + if (input.isRepeating) { + inputElementNum = 0; + } + if (!input.noNulls && input.isNull[inputElementNum]) { + isNull[outElementNum] = true; + noNulls = false; + } else { + isNull[outElementNum] = false; + int offset = childCount; + int length = (int) input.lengths[inputElementNum]; + int inputOffset = (int) input.offsets[inputElementNum]; + offsets[outElementNum] = offset; + childCount += length; + lengths[outElementNum] = length; + child.ensureSize(childCount, true); + for (int i = 0; i < length; ++i) { + child.setElement(i + offset, inputOffset + i, input.child); + } + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + boolean isFirst = true; + for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) { + if (isFirst) { + isFirst = false; + } else { + buffer.append(", "); + } + child.stringifyValue(buffer, (int) i); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void init() { + super.init(); + child.init(); + } + + @Override + public void reset() { + super.reset(); + child.reset(); + } + + @Override + public void unFlatten() { + super.unFlatten(); + if (!isRepeating || noNulls || !isNull[0]) { + child.unFlatten(); + } + } + +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java index 5702584..c81f13b 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/LongColumnVector.java @@ -172,7 +172,17 @@ public void flatten(boolean selectedInUse, int[] sel, int size) { @Override public void setElement(int outElementNum, int inputElementNum, ColumnVector inputVector) { - vector[outElementNum] = ((LongColumnVector) inputVector).vector[inputElementNum]; + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + vector[outElementNum] = + ((LongColumnVector) inputVector).vector[inputElementNum]; + } else { + isNull[outElementNum] = true; + noNulls = false; + } } @Override @@ -186,4 +196,20 @@ public void stringifyValue(StringBuilder buffer, int row) { buffer.append("null"); } } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > vector.length) { + super.ensureSize(size, preserveData); + long[] oldArray = vector; + vector = new long[size]; + if (preserveData) { + if (isRepeating) { + vector[0] = oldArray[0]; + } else { + System.arraycopy(oldArray, 0, vector, 0 , oldArray.length); + } + } + } + } } diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java new file mode 100644 index 0000000..e8421e3 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MapColumnVector.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of map objects. + * + * Each map is composed of a range of elements in the underlying child + * ColumnVector. The range for map i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public class MapColumnVector extends MultiValuedColumnVector { + + public ColumnVector keys; + public ColumnVector values; + + public MapColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE, null, null); + } + + /** + * Constructor for MapColumnVector + * + * @param len Vector length + * @param keys The keys column vector + * @param values The values column vector + */ + public MapColumnVector(int len, ColumnVector keys, ColumnVector values) { + super(len); + this.keys = keys; + this.values = values; + } + + @Override + protected void childFlatten(boolean useSelected, int[] selected, int size) { + keys.flatten(useSelected, selected, size); + values.flatten(useSelected, selected, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (!inputVector.noNulls && inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = true; + noNulls = false; + } else { + MapColumnVector input = (MapColumnVector) inputVector; + isNull[outElementNum] = false; + int offset = childCount; + int length = (int) input.lengths[inputElementNum]; + int inputOffset = (int) input.offsets[inputElementNum]; + offsets[outElementNum] = offset; + childCount += length; + lengths[outElementNum] = length; + keys.ensureSize(childCount, true); + values.ensureSize(childCount, true); + for (int i = 0; i < length; ++i) { + keys.setElement(i + offset, inputOffset + i, input.keys); + values.setElement(i + offset, inputOffset + i, input.values); + } + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + boolean isFirst = true; + for(long i=offsets[row]; i < offsets[row] + lengths[row]; ++i) { + if (isFirst) { + isFirst = false; + } else { + buffer.append(", "); + } + buffer.append("{\"key\": "); + keys.stringifyValue(buffer, (int) i); + buffer.append(", \"value\": "); + values.stringifyValue(buffer, (int) i); + buffer.append('}'); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void init() { + super.init(); + keys.init(); + values.init(); + } + + @Override + public void reset() { + super.reset(); + keys.reset(); + values.reset(); + } + + @Override + public void unFlatten() { + super.unFlatten(); + if (!isRepeating || noNulls || !isNull[0]) { + keys.unFlatten(); + values.unFlatten(); + } + } +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java new file mode 100644 index 0000000..d8451f0 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/MultiValuedColumnVector.java @@ -0,0 +1,150 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * The representation of a vectorized column of multi-valued objects, such + * as lists and maps. + * + * Each object is composed of a range of elements in the underlying child + * ColumnVector. The range for list i is + * offsets[i]..offsets[i]+lengths[i]-1 inclusive. + */ +public abstract class MultiValuedColumnVector extends ColumnVector { + + public long[] offsets; + public long[] lengths; + // the number of children slots used + public int childCount; + + /** + * Constructor for MultiValuedColumnVector. + * + * @param len Vector length + */ + public MultiValuedColumnVector(int len) { + super(len); + childCount = 0; + offsets = new long[len]; + lengths = new long[len]; + } + + protected abstract void childFlatten(boolean useSelected, int[] selected, + int size); + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + + if (isRepeating) { + if (noNulls || !isNull[0]) { + if (selectedInUse) { + for (int i = 0; i < size; ++i) { + int row = sel[i]; + offsets[row] = offsets[0]; + lengths[row] = lengths[0]; + isNull[row] = false; + } + } else { + Arrays.fill(offsets, 0, size, offsets[0]); + Arrays.fill(lengths, 0, size, lengths[0]); + Arrays.fill(isNull, 0, size, false); + } + // We optimize by assuming that a repeating list/map will run from + // from 0 .. lengths[0] in the child vector. + // Sanity check the assumption that we can start at 0. + if (offsets[0] != 0) { + throw new IllegalArgumentException("Repeating offset isn't 0, but " + + offsets[0]); + } + childFlatten(false, null, (int) lengths[0]); + } else { + if (selectedInUse) { + for(int i=0; i < size; ++i) { + isNull[sel[i]] = true; + } + } else { + Arrays.fill(isNull, 0, size, true); + } + } + isRepeating = false; + noNulls = false; + } else { + if (selectedInUse) { + int childSize = 0; + for(int i=0; i < size; ++i) { + childSize += lengths[sel[i]]; + } + int[] childSelection = new int[childSize]; + int idx = 0; + for(int i=0; i < size; ++i) { + int row = sel[i]; + for(int elem=0; elem < lengths[row]; ++elem) { + childSelection[idx++] = (int) (offsets[row] + elem); + } + } + childFlatten(true, childSelection, childSize); + } else { + childFlatten(false, null, childCount); + } + flattenNoNulls(selectedInUse, sel, size); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + if (size > offsets.length) { + super.ensureSize(size, preserveData); + long[] oldOffsets = offsets; + offsets = new long[size]; + long oldLengths[] = lengths; + lengths = new long[size]; + if (preserveData) { + if (isRepeating) { + offsets[0] = oldOffsets[0]; + lengths[0] = oldLengths[0]; + } else { + System.arraycopy(oldOffsets, 0, offsets, 0 , oldOffsets.length); + System.arraycopy(oldLengths, 0, lengths, 0, oldLengths.length); + } + } + } + } + + /** + * Initializee the vector + */ + @Override + public void init() { + super.init(); + childCount = 0; + } + + /** + * Reset the vector for the next batch. + */ + @Override + public void reset() { + super.reset(); + childCount = 0; + } + +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java new file mode 100644 index 0000000..f7c8b05 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/StructColumnVector.java @@ -0,0 +1,124 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +/** + * The representation of a vectorized column of struct objects. + * + * Each field is represented by a separate inner ColumnVector. Since this + * ColumnVector doesn't own any per row data other that the isNull flag, the + * isRepeating only covers the isNull array. + */ +public class StructColumnVector extends ColumnVector { + + public ColumnVector[] fields; + + public StructColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Constructor for StructColumnVector + * + * @param len Vector length + * @param fields the field column vectors + */ + public StructColumnVector(int len, ColumnVector... fields) { + super(len); + this.fields = fields; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + for(int i=0; i < fields.length; ++i) { + fields[i].flatten(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + ColumnVector[] inputFields = ((StructColumnVector) inputVector).fields; + for (int i = 0; i < inputFields.length; ++i) { + fields[i].setElement(outElementNum, inputElementNum, inputFields[i]); + } + } else { + noNulls = false; + isNull[outElementNum] = true; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append('['); + for(int i=0; i < fields.length; ++i) { + if (i != 0) { + buffer.append(", "); + } + fields[i].stringifyValue(buffer, row); + } + buffer.append(']'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + for(int i=0; i < fields.length; ++i) { + fields[i].ensureSize(size, preserveData); + } + } + + @Override + public void reset() { + super.reset(); + for(int i =0; i < fields.length; ++i) { + fields[i].reset(); + } + } + + @Override + public void init() { + super.init(); + for(int i =0; i < fields.length; ++i) { + fields[i].init(); + } + } + + @Override + public void unFlatten() { + super.unFlatten(); + for(int i=0; i < fields.length; ++i) { + fields[i].unFlatten(); + } + } +} diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java new file mode 100644 index 0000000..2b3b013 --- /dev/null +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/UnionColumnVector.java @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import java.util.Arrays; + +/** + * The representation of a vectorized column of struct objects. + * + * Each field is represented by a separate inner ColumnVector. Since this + * ColumnVector doesn't own any per row data other that the isNull flag, the + * isRepeating only covers the isNull array. + */ +public class UnionColumnVector extends ColumnVector { + + public int[] tags; + public ColumnVector[] fields; + + public UnionColumnVector() { + this(VectorizedRowBatch.DEFAULT_SIZE); + } + + /** + * Constructor for UnionColumnVector + * + * @param len Vector length + * @param fields the field column vectors + */ + public UnionColumnVector(int len, ColumnVector... fields) { + super(len); + tags = new int[len]; + this.fields = fields; + } + + @Override + public void flatten(boolean selectedInUse, int[] sel, int size) { + flattenPush(); + for(int i=0; i < fields.length; ++i) { + fields[i].flatten(selectedInUse, sel, size); + } + flattenNoNulls(selectedInUse, sel, size); + } + + @Override + public void setElement(int outElementNum, int inputElementNum, + ColumnVector inputVector) { + if (inputVector.isRepeating) { + inputElementNum = 0; + } + if (inputVector.noNulls || !inputVector.isNull[inputElementNum]) { + isNull[outElementNum] = false; + UnionColumnVector input = (UnionColumnVector) inputVector; + tags[outElementNum] = input.tags[inputElementNum]; + fields[tags[outElementNum]].setElement(outElementNum, inputElementNum, + input.fields[tags[outElementNum]]); + } else { + noNulls = false; + isNull[outElementNum] = true; + } + } + + @Override + public void stringifyValue(StringBuilder buffer, int row) { + if (isRepeating) { + row = 0; + } + if (noNulls || !isNull[row]) { + buffer.append("{\"tag\": "); + buffer.append(tags[row]); + buffer.append(", \"value\": "); + fields[tags[row]].stringifyValue(buffer, row); + buffer.append('}'); + } else { + buffer.append("null"); + } + } + + @Override + public void ensureSize(int size, boolean preserveData) { + super.ensureSize(size, preserveData); + if (tags.length < size) { + if (preserveData) { + int[] oldTags = tags; + tags = new int[size]; + System.arraycopy(oldTags, 0, tags, 0, oldTags.length); + } else { + tags = new int[size]; + } + for(int i=0; i < fields.length; ++i) { + fields[i].ensureSize(size, preserveData); + } + } + } + + @Override + public void reset() { + super.reset(); + for(int i =0; i < fields.length; ++i) { + fields[i].reset(); + } + } + + @Override + public void init() { + super.init(); + for(int i =0; i < fields.length; ++i) { + fields[i].init(); + } + } + + @Override + public void unFlatten() { + super.unFlatten(); + for(int i=0; i < fields.length; ++i) { + fields[i].unFlatten(); + } + } +} diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java new file mode 100644 index 0000000..395d8f5 --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestListColumnVector.java @@ -0,0 +1,200 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.Test; + +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test for ListColumnVector + */ +public class TestListColumnVector { + + @Test + public void testFlatten() throws Exception { + LongColumnVector col1 = new LongColumnVector(10); + ListColumnVector vector = new ListColumnVector(10, col1); + vector.init(); + + // TEST - repeating NULL & no selection + col1.isRepeating = true; + vector.isRepeating = true; + vector.noNulls = false; + vector.isNull[0] = true; + vector.childCount = 0; + for(int i=0; i < 10; ++i) { + col1.vector[i] = i + 3; + vector.offsets[i] = i; + vector.lengths[i] = 10 + i; + } + vector.flatten(false, null, 10); + // make sure the vector was flattened + assertFalse(vector.isRepeating); + assertFalse(vector.noNulls); + // child isn't flattened, because parent is repeating null + assertTrue(col1.isRepeating); + assertTrue(col1.noNulls); + for(int i=0; i < 10; ++i) { + assertTrue("isNull at " + i, vector.isNull[i]); + } + for(int i=0; i < 10; ++i) { + StringBuilder buf = new StringBuilder(); + vector.stringifyValue(buf, i); + assertEquals("null", buf.toString()); + } + vector.unFlatten(); + assertTrue(col1.isRepeating); + assertTrue(vector.isRepeating); + + // TEST - repeating NULL & selection + Arrays.fill(vector.isNull, 1, 10, false); + int[] sel = new int[]{3, 5, 7}; + vector.flatten(true, sel, 3); + for(int i=1; i < 10; i++) { + assertEquals("failure at " + i, + i == 3 || i == 5 || i == 7, vector.isNull[i]); + } + vector.unFlatten(); + + // TEST - repeating non-NULL & no-selection + vector.noNulls = true; + vector.isRepeating = true; + vector.offsets[0] = 0; + vector.lengths[0] = 3; + vector.childCount = 3; + vector.flatten(false, null, 10); + // make sure the vector was flattened + assertFalse(vector.isRepeating); + assertFalse(vector.noNulls); + assertFalse(col1.isRepeating); + assertFalse(col1.noNulls); + for(int i=0; i < 10; ++i) { + assertEquals("offset at " + i, 0, vector.offsets[i]); + assertEquals("length at " + i, 3, vector.lengths[i]); + } + for(int i=0; i < 10; ++i) { + StringBuilder buf = new StringBuilder(); + vector.stringifyValue(buf, i); + assertEquals("[3, 3, 3]", buf.toString()); + } + vector.unFlatten(); + assertTrue(col1.isRepeating); + assertTrue(col1.noNulls); + assertTrue(vector.isRepeating); + assertTrue(vector.noNulls); + + // TEST - repeating non-NULL & selection + Arrays.fill(vector.offsets, 1, 10, -1); + Arrays.fill(vector.lengths, 1, 10, -1); + Arrays.fill(col1.vector, 1, 10, -1); + vector.flatten(true, sel, 3); + for(int i=1; i < 10; i++) { + if (i == 3 || i == 5 || i == 7) { + assertEquals("failure at " + i, 0, vector.offsets[i]); + assertEquals("failure at " + i, 3, vector.lengths[i]); + } else { + assertEquals("failure at " + i, -1, vector.offsets[i]); + assertEquals("failure at " + i, -1, vector.lengths[i]); + } + } + for(int i=0; i < 3; ++i) { + assertEquals("failure at " + i, 3, col1.vector[i]); + } + for(int i=3; i < 10; ++i) { + assertEquals("failure at " + i, -1, col1.vector[i]); + } + vector.unFlatten(); + + // TEST - reset + vector.reset(); + assertFalse(col1.isRepeating); + assertTrue(col1.noNulls); + assertFalse(vector.isRepeating); + assertTrue(vector.noNulls); + assertEquals(0, vector.childCount); + } + + @Test + public void testSet() throws Exception { + LongColumnVector input1 = new LongColumnVector(10); + ListColumnVector input = new ListColumnVector(10, input1); + input.init(); + LongColumnVector output1 = new LongColumnVector(30); + ListColumnVector output = new ListColumnVector(10, output1); + output.init(); + input.noNulls = false; + input.isNull[6] = true; + input.childCount = 11; + Arrays.fill(output1.vector, -1); + for(int i=0; i < 10; ++i) { + input1.vector[i] = 10 * i; + input.offsets[i] = i; + input.lengths[i] = 2; + output.offsets[i] = i + 2; + output.lengths[i] = 3; + } + output.childCount = 30; + + // copy a null + output.setElement(3, 6, input); + assertEquals(30, output.childCount); + StringBuilder buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("null", buf.toString()); + + // copy a value + output.setElement(3, 5, input); + assertEquals(30, output.offsets[3]); + assertEquals(2, output.lengths[3]); + assertEquals(32, output.childCount); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("[50, 60]", buf.toString()); + + // overwrite a value + output.setElement(3, 4, input); + assertEquals(34, output.childCount); + assertEquals(34, output1.vector.length); + assertEquals(50, output1.vector[30]); + assertEquals(60, output1.vector[31]); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("[40, 50]", buf.toString()); + + input.reset(); + assertEquals(false, input1.isRepeating); + assertEquals(true, input.noNulls); + output.reset(); + assertEquals(0, output.childCount); + + input.isRepeating = true; + input.offsets[0] = 0; + input.lengths[0] = 10; + output.setElement(2, 7, input); + assertEquals(10, output.childCount); + buf = new StringBuilder(); + output.stringifyValue(buf, 2); + assertEquals("[0, 10, 20, 30, 40, 50, 60, 70, 80, 90]", buf.toString()); + } +} diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java new file mode 100644 index 0000000..c77c286 --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestMapColumnVector.java @@ -0,0 +1,224 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.Test; + +import java.util.Arrays; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test for MapColumnVector + */ +public class TestMapColumnVector { + + @Test + public void testFlatten() throws Exception { + LongColumnVector col1 = new LongColumnVector(10); + DoubleColumnVector col2 = new DoubleColumnVector(10); + MapColumnVector vector = new MapColumnVector(10, col1, col2); + vector.init(); + + // TEST - repeating NULL & no selection + col1.isRepeating = true; + vector.isRepeating = true; + vector.noNulls = false; + vector.isNull[0] = true; + vector.childCount = 0; + for(int i=0; i < 10; ++i) { + col1.vector[i] = i + 3; + col2.vector[i] = i * 10; + vector.offsets[i] = i; + vector.lengths[i] = 10 + i; + } + vector.flatten(false, null, 10); + // make sure the vector was flattened + assertFalse(vector.isRepeating); + assertFalse(vector.noNulls); + // child isn't flattened, because parent is repeating null + assertTrue(col1.isRepeating); + assertTrue(col1.noNulls); + for(int i=0; i < 10; ++i) { + assertTrue("isNull at " + i, vector.isNull[i]); + } + for(int i=0; i < 10; ++i) { + StringBuilder buf = new StringBuilder(); + vector.stringifyValue(buf, i); + assertEquals("null", buf.toString()); + } + vector.unFlatten(); + assertTrue(col1.isRepeating); + assertTrue(vector.isRepeating); + + // TEST - repeating NULL & selection + Arrays.fill(vector.isNull, 1, 10, false); + int[] sel = new int[]{3, 5, 7}; + vector.flatten(true, sel, 3); + for(int i=1; i < 10; i++) { + assertEquals("failure at " + i, + i == 3 || i == 5 || i == 7, vector.isNull[i]); + } + vector.unFlatten(); + + // TEST - repeating non-NULL & no-selection + vector.noNulls = true; + vector.isRepeating = true; + vector.offsets[0] = 0; + vector.lengths[0] = 3; + vector.childCount = 3; + vector.flatten(false, null, 10); + // make sure the vector was flattened + assertFalse(vector.isRepeating); + assertFalse(vector.noNulls); + assertFalse(col1.isRepeating); + assertFalse(col1.noNulls); + assertFalse(col2.isRepeating); + assertFalse(col2.noNulls); + for(int i=0; i < 10; ++i) { + assertEquals("offset at " + i, 0, vector.offsets[i]); + assertEquals("length at " + i, 3, vector.lengths[i]); + } + for(int i=0; i < 10; ++i) { + StringBuilder buf = new StringBuilder(); + vector.stringifyValue(buf, i); + assertEquals("[{\"key\": 3, \"value\": 0.0}," + + " {\"key\": 3, \"value\": 10.0}," + + " {\"key\": 3, \"value\": 20.0}]", buf.toString()); + } + vector.unFlatten(); + assertTrue(col1.isRepeating); + assertTrue(col1.noNulls); + assertTrue(vector.isRepeating); + assertFalse(col2.isRepeating); + assertTrue(col2.noNulls); + assertTrue(vector.noNulls); + + // TEST - repeating non-NULL & selection + Arrays.fill(vector.offsets, 1, 10, -1); + Arrays.fill(vector.lengths, 1, 10, -1); + Arrays.fill(col1.vector, 1, 10, -1); + vector.flatten(true, sel, 3); + for(int i=1; i < 10; i++) { + if (i == 3 || i == 5 || i == 7) { + assertEquals("failure at " + i, 0, vector.offsets[i]); + assertEquals("failure at " + i, 3, vector.lengths[i]); + } else { + assertEquals("failure at " + i, -1, vector.offsets[i]); + assertEquals("failure at " + i, -1, vector.lengths[i]); + } + } + for(int i=0; i < 3; ++i) { + assertEquals("failure at " + i, 3, col1.vector[i]); + } + for(int i=3; i < 10; ++i) { + assertEquals("failure at " + i, -1, col1.vector[i]); + } + vector.unFlatten(); + + // TEST - reset + vector.reset(); + assertFalse(col1.isRepeating); + assertTrue(col1.noNulls); + assertFalse(col2.isRepeating); + assertTrue(col2.noNulls); + assertFalse(vector.isRepeating); + assertTrue(vector.noNulls); + assertEquals(0, vector.childCount); + } + + @Test + public void testSet() throws Exception { + LongColumnVector input1 = new LongColumnVector(10); + DoubleColumnVector input2 = new DoubleColumnVector(10); + MapColumnVector input = new MapColumnVector(10, input1, input2); + input.init(); + LongColumnVector output1 = new LongColumnVector(30); + DoubleColumnVector output2 = new DoubleColumnVector(30); + MapColumnVector output = new MapColumnVector(10, output1, output2); + output.init(); + input.noNulls = false; + input.isNull[6] = true; + input.childCount = 11; + Arrays.fill(output1.vector, -1); + for(int i=0; i < 10; ++i) { + input1.vector[i] = 10 * i; + input2.vector[i] = 100 * i; + input.offsets[i] = i; + input.lengths[i] = 2; + output.offsets[i] = i + 2; + output.lengths[i] = 3; + } + output.childCount = 30; + + // copy a null + output.setElement(3, 6, input); + assertEquals(30, output.childCount); + StringBuilder buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("null", buf.toString()); + + // copy a value + output.setElement(3, 5, input); + assertEquals(30, output.offsets[3]); + assertEquals(2, output.lengths[3]); + assertEquals(32, output.childCount); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("[{\"key\": 50, \"value\": 500.0}," + + " {\"key\": 60, \"value\": 600.0}]", buf.toString()); + + // overwrite a value + output.setElement(3, 4, input); + assertEquals(34, output.childCount); + assertEquals(34, output1.vector.length); + assertEquals(50, output1.vector[30]); + assertEquals(60, output1.vector[31]); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("[{\"key\": 40, \"value\": 400.0}," + + " {\"key\": 50, \"value\": 500.0}]", buf.toString()); + + input.reset(); + assertEquals(false, input1.isRepeating); + assertEquals(true, input.noNulls); + output.reset(); + assertEquals(0, output.childCount); + + input.isRepeating = true; + input.offsets[0] = 0; + input.lengths[0] = 10; + output.setElement(2, 7, input); + assertEquals(10, output.childCount); + buf = new StringBuilder(); + output.stringifyValue(buf, 2); + assertEquals("[{\"key\": 0, \"value\": 0.0}," + + " {\"key\": 10, \"value\": 100.0}," + + " {\"key\": 20, \"value\": 200.0}," + + " {\"key\": 30, \"value\": 300.0}," + + " {\"key\": 40, \"value\": 400.0}," + + " {\"key\": 50, \"value\": 500.0}," + + " {\"key\": 60, \"value\": 600.0}," + + " {\"key\": 70, \"value\": 700.0}," + + " {\"key\": 80, \"value\": 800.0}," + + " {\"key\": 90, \"value\": 900.0}]", buf.toString()); + } +} diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java new file mode 100644 index 0000000..41b4b65 --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestStructColumnVector.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test for StructColumnVector + */ +public class TestStructColumnVector { + + @Test + public void testFlatten() throws Exception { + LongColumnVector col1 = new LongColumnVector(10); + LongColumnVector col2 = new LongColumnVector(10); + StructColumnVector vector = new StructColumnVector(10, col1, col2); + vector.init(); + col1.isRepeating = true; + for(int i=0; i < 10; ++i) { + col1.vector[i] = i; + col2.vector[i] = 2 * i; + } + vector.flatten(false, null, 10); + assertFalse(col1.isRepeating); + for(int i=0; i < 10; ++i) { + assertEquals("col1 at " + i, 0, col1.vector[i]); + assertEquals("col2 at " + i, 2 * i, col2.vector[i]); + } + vector.unFlatten(); + assertTrue(col1.isRepeating); + for(int i=0; i < 10; ++i) { + StringBuilder buf = new StringBuilder(); + vector.stringifyValue(buf, i); + assertEquals("[0, " + (2 * i) + "]", buf.toString()); + } + vector.reset(); + assertFalse(col1.isRepeating); + } + + @Test + public void testSet() throws Exception { + LongColumnVector input1 = new LongColumnVector(10); + LongColumnVector input2 = new LongColumnVector(10); + StructColumnVector input = new StructColumnVector(10, input1, input2); + input.init(); + LongColumnVector output1 = new LongColumnVector(10); + LongColumnVector output2 = new LongColumnVector(10); + StructColumnVector output = new StructColumnVector(10, output1, output2); + output.init(); + input1.isRepeating = true; + input2.noNulls = false; + input2.isNull[5] = true; + input.noNulls = false; + input.isNull[6] = true; + for(int i=0; i < 10; ++i) { + input1.vector[i] = i + 1; + input2.vector[i] = i + 2; + } + output.setElement(3, 6, input); + StringBuilder buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("null", buf.toString()); + output.setElement(3, 5, input); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("[1, null]", buf.toString()); + output.setElement(3, 4, input); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("[1, 6]", buf.toString()); + input.reset(); + assertEquals(false, input1.isRepeating); + assertEquals(true, input.noNulls); + } +} diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java new file mode 100644 index 0000000..c378cd4 --- /dev/null +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestUnionColumnVector.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +/** + * Test for StructColumnVector + */ +public class TestUnionColumnVector { + + @Test + public void testFlatten() throws Exception { + LongColumnVector col1 = new LongColumnVector(10); + LongColumnVector col2 = new LongColumnVector(10); + UnionColumnVector vector = new UnionColumnVector(10, col1, col2); + vector.init(); + col1.isRepeating = true; + for(int i=0; i < 10; ++i) { + vector.tags[i] = i % 2; + col1.vector[i] = i; + col2.vector[i] = 2 * i; + } + vector.flatten(false, null, 10); + assertFalse(col1.isRepeating); + for(int i=0; i < 10; ++i) { + assertEquals(i % 2, vector.tags[i]); + assertEquals("col1 at " + i, 0, col1.vector[i]); + assertEquals("col2 at " + i, 2 * i, col2.vector[i]); + } + vector.unFlatten(); + assertTrue(col1.isRepeating); + for(int i=0; i < 10; ++i) { + StringBuilder buf = new StringBuilder(); + vector.stringifyValue(buf, i); + assertEquals("{\"tag\": " + (i % 2) + ", \"value\": " + + (i % 2 == 0 ? 0 : 2 * i) + "}", buf.toString()); + } + vector.reset(); + assertFalse(col1.isRepeating); + } + + @Test + public void testSet() throws Exception { + LongColumnVector input1 = new LongColumnVector(10); + LongColumnVector input2 = new LongColumnVector(10); + UnionColumnVector input = new UnionColumnVector(10, input1, input2); + input.init(); + LongColumnVector output1 = new LongColumnVector(10); + LongColumnVector output2 = new LongColumnVector(10); + UnionColumnVector output = new UnionColumnVector(10, output1, output2); + output.init(); + input1.isRepeating = true; + for(int i=0; i < 10; ++i) { + input.tags[i] = i % 2; + input1.vector[i] = i + 1; + input2.vector[i] = i + 2; + } + output.setElement(3, 4, input); + StringBuilder buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("{\"tag\": 0, \"value\": 1}", buf.toString()); + input.noNulls = false; + input.isNull[5] = true; + output.setElement(3, 5, input); + buf = new StringBuilder(); + output.stringifyValue(buf, 3); + assertEquals("null", buf.toString()); + input.reset(); + assertEquals(false, input1.isRepeating); + assertEquals(true, input.noNulls); + } +}