diff --git a/service/src/java/org/apache/hive/service/cli/Column.java b/service/src/java/org/apache/hive/service/cli/Column.java index adb269a..2b7db80 100644 --- a/service/src/java/org/apache/hive/service/cli/Column.java +++ b/service/src/java/org/apache/hive/service/cli/Column.java @@ -25,6 +25,7 @@ import java.util.BitSet; import java.util.List; +import com.google.common.annotations.VisibleForTesting; import com.google.common.primitives.Booleans; import com.google.common.primitives.Bytes; import com.google.common.primitives.Doubles; @@ -80,7 +81,7 @@ public Column(Type type, BitSet nulls, Object values) { } else if (type == Type.BIGINT_TYPE) { longVars = (long[]) values; size = longVars.length; - } else if (type == Type.DOUBLE_TYPE) { + } else if (type == Type.DOUBLE_TYPE || type == Type.FLOAT_TYPE) { doubleVars = (double[]) values; size = doubleVars.length; } else if (type == Type.BINARY_TYPE) { @@ -173,67 +174,77 @@ public Column(TColumn colValues) { } } - public Column extractSubset(int start, int end) { - BitSet subNulls = nulls.get(start, end); + /** + * Get a subset of this ColumnBuffer, starting from the 1st value. + * + * @param end index after the last value to include + */ + public Column extractSubset(int end) { + BitSet subNulls = nulls.get(0, end); if (type == Type.BOOLEAN_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(boolVars, start, end)); + Column subset = new Column(type, subNulls, Arrays.copyOfRange(boolVars, 0, end)); boolVars = Arrays.copyOfRange(boolVars, end, size); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = boolVars.length; return subset; } if (type == Type.TINYINT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(byteVars, start, end)); + Column subset = new Column(type, subNulls, Arrays.copyOfRange(byteVars, 0, end)); byteVars = Arrays.copyOfRange(byteVars, end, size); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = byteVars.length; return subset; } if (type == Type.SMALLINT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(shortVars, start, end)); + Column subset = new Column(type, subNulls, Arrays.copyOfRange(shortVars, 0, end)); shortVars = Arrays.copyOfRange(shortVars, end, size); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = shortVars.length; return subset; } if (type == Type.INT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(intVars, start, end)); + Column subset = new Column(type, subNulls, Arrays.copyOfRange(intVars, 0, end)); intVars = Arrays.copyOfRange(intVars, end, size); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = intVars.length; return subset; } if (type == Type.BIGINT_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(longVars, start, end)); + Column subset = new Column(type, subNulls, Arrays.copyOfRange(longVars, 0, end)); longVars = Arrays.copyOfRange(longVars, end, size); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = longVars.length; return subset; } - if (type == Type.DOUBLE_TYPE) { - Column subset = new Column(type, subNulls, Arrays.copyOfRange(doubleVars, start, end)); + if (type == Type.DOUBLE_TYPE || type == Type.FLOAT_TYPE) { + Column subset = new Column(type, subNulls, Arrays.copyOfRange(doubleVars, 0, end)); doubleVars = Arrays.copyOfRange(doubleVars, end, size); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = doubleVars.length; return subset; } if (type == Type.BINARY_TYPE) { - Column subset = new Column(type, subNulls, binaryVars.subList(start, end)); + Column subset = new Column(type, subNulls, binaryVars.subList(0, end)); binaryVars = binaryVars.subList(end, binaryVars.size()); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = binaryVars.size(); return subset; } if (type == Type.STRING_TYPE) { - Column subset = new Column(type, subNulls, stringVars.subList(start, end)); + Column subset = new Column(type, subNulls, stringVars.subList(0, end)); stringVars = stringVars.subList(end, stringVars.size()); - nulls = nulls.get(start, size); + nulls = nulls.get(end, size); size = stringVars.size(); return subset; } throw new IllegalStateException("invalid union object"); } + @VisibleForTesting + BitSet getNulls() { + return nulls; + } + private static final byte[] MASKS = new byte[] { 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, (byte)0x80 }; diff --git a/service/src/java/org/apache/hive/service/cli/ColumnBasedSet.java b/service/src/java/org/apache/hive/service/cli/ColumnBasedSet.java index 47a582e..46c8adb 100644 --- a/service/src/java/org/apache/hive/service/cli/ColumnBasedSet.java +++ b/service/src/java/org/apache/hive/service/cli/ColumnBasedSet.java @@ -87,7 +87,7 @@ public ColumnBasedSet extractSubset(int maxRows) { List subset = new ArrayList(); for (int i = 0; i < columns.size(); i++) { - subset.add(columns.get(i).extractSubset(0, numRows)); + subset.add(columns.get(i).extractSubset(numRows)); } ColumnBasedSet result = new ColumnBasedSet(types, subset, startOffset); startOffset += numRows; diff --git a/service/src/test/org/apache/hive/service/cli/TestColumn.java b/service/src/test/org/apache/hive/service/cli/TestColumn.java index 87bf848..4c97016 100644 --- a/service/src/test/org/apache/hive/service/cli/TestColumn.java +++ b/service/src/test/org/apache/hive/service/cli/TestColumn.java @@ -17,19 +17,154 @@ */ package org.apache.hive.service.cli; +import org.junit.Assert; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import java.nio.ByteBuffer; import java.util.Arrays; +import java.util.BitSet; +import java.util.Collection; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Random; +import java.util.Set; +import java.util.concurrent.ThreadLocalRandom; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; +@RunWith(Parameterized.class) public class TestColumn { + + private static final int NUM_VARS = 100; + private static final int NUM_NULLS = 30; + private static final Set nullIndices = new HashSet<>(); + + private final Type type; + private final Object vars; + + @Parameterized.Parameters + public static Collection types() { + return Arrays.asList(new Object[][]{ + {Type.BOOLEAN_TYPE}, + {Type.TINYINT_TYPE}, + {Type.SMALLINT_TYPE}, + {Type.INT_TYPE}, + {Type.BIGINT_TYPE}, + {Type.DOUBLE_TYPE}, + {Type.FLOAT_TYPE}, + {Type.BINARY_TYPE}, + {Type.STRING_TYPE} + } + ); + } + + public TestColumn(Type type) { + this.type = type; + switch (type) { + case BOOLEAN_TYPE: + vars = new boolean[NUM_VARS]; + break; + case TINYINT_TYPE: + vars = new byte[NUM_VARS]; + break; + case SMALLINT_TYPE: + vars = new short[NUM_VARS]; + break; + case INT_TYPE: + vars = new int[NUM_VARS]; + break; + case BIGINT_TYPE: + vars = new long[NUM_VARS]; + break; + case DOUBLE_TYPE: + case FLOAT_TYPE: + vars = new double[NUM_VARS]; + break; + case BINARY_TYPE: + vars = Arrays.asList(new ByteBuffer[NUM_VARS]); + break; + case STRING_TYPE: + vars = Arrays.asList(new String[NUM_VARS]); + break; + default: + throw new IllegalArgumentException("Invalid type " + type); + } + } + + private static void prepareNullIndices() { + nullIndices.clear(); + Random random = ThreadLocalRandom.current(); + while (nullIndices.size() != NUM_NULLS) { + nullIndices.add(random.nextInt(NUM_VARS)); + } + } + + /** + * Test if the nulls BitSet is maintained properly when we extract subset from ColumnBuffer. + * E.g. suppose we have a ColumnBuffer with nulls [0, 0, 1, 0]. When we split it evenly into + * two subsets, the subsets should have nulls [0, 0] and [1, 0] respectively. + */ + @Test + public void testNullsInSubset() { + prepareNullIndices(); + BitSet nulls = new BitSet(NUM_VARS); + for (int index : nullIndices) { + nulls.set(index); + } + + Column columnBuffer = new Column(type, nulls, vars); + Random random = ThreadLocalRandom.current(); + + int remaining = NUM_VARS; + while (remaining > 0) { + int toExtract = random.nextInt(remaining) + 1; + Column subset = columnBuffer.extractSubset(toExtract); + verifyNulls(subset, NUM_VARS - remaining); + remaining -= toExtract; + } + } + + private static void verifyNulls(Column buffer, int shift) { + BitSet nulls = buffer.getNulls(); + for (int i = 0; i < buffer.size(); i++) { + Assert.assertEquals("BitSet in parent and subset not the same.", + nullIndices.contains(i + shift), nulls.get(i)); + } + } + @Test - public void testAllIntegerTypeValues() { + public void testAddValues() { + switch (type) { + case BOOLEAN_TYPE: + testBooleanValues(); + break; + case TINYINT_TYPE: + case SMALLINT_TYPE: + case INT_TYPE: + case BIGINT_TYPE: + testAllIntegerTypeValues(); + break; + case DOUBLE_TYPE: + case FLOAT_TYPE: + testFloatAndDoubleValues(); + break; + case BINARY_TYPE: + testBinaryValues(); + break; + case STRING_TYPE: + testStringValues(); + break; + default: + throw new IllegalArgumentException("Invalid type " + type); + } + } + + private void testAllIntegerTypeValues() { Map> integerTypesAndValues = new LinkedHashMap>(); // Add TINYINT values @@ -71,8 +206,8 @@ public void testAllIntegerTypeValues() { } } - @Test - public void testFloatAndDoubleValues() { + + private void testFloatAndDoubleValues() { Column floatColumn = new Column(Type.FLOAT_TYPE); floatColumn.addValue(Type.FLOAT_TYPE, 1.1f); floatColumn.addValue(Type.FLOAT_TYPE, 2.033f); @@ -93,8 +228,8 @@ public void testFloatAndDoubleValues() { assertEquals(2.033, doubleColumn.get(1)); } - @Test - public void testBooleanValues() { + + private void testBooleanValues() { Column boolColumn = new Column(Type.BOOLEAN_TYPE); boolColumn.addValue(Type.BOOLEAN_TYPE, true); boolColumn.addValue(Type.BOOLEAN_TYPE, false); @@ -105,8 +240,8 @@ public void testBooleanValues() { assertEquals(false, boolColumn.get(1)); } - @Test - public void testStringValues() { + + private void testStringValues() { Column stringColumn = new Column(Type.STRING_TYPE); stringColumn.addValue(Type.STRING_TYPE, "12abc456"); stringColumn.addValue(Type.STRING_TYPE, "~special$&string"); @@ -117,8 +252,8 @@ public void testStringValues() { assertEquals("~special$&string", stringColumn.get(1)); } - @Test - public void testBinaryValues() { + + private void testBinaryValues() { Column binaryColumn = new Column(Type.BINARY_TYPE); binaryColumn.addValue(Type.BINARY_TYPE, new byte[]{-1, 0, 3, 4});