diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 0ff98bd..a4bc188 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single byte array value hash map optimized for vector map join. + * An bytes key hash map optimized for vector map join. + * + * This is the abstract base for the multi-key and string bytes key hash map implementations. */ public abstract class VectorMapJoinFastBytesHashMap extends VectorMapJoinFastBytesHashTable @@ -37,6 +44,8 @@ private VectorMapJoinFastValueStore valueStore; + protected BytesWritable testValueBytesWritable; + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); @@ -56,7 +65,6 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); - keysAssigned++; } else { // Add another value. // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 5d8ed2d..aaf3497 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -18,16 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single byte array value hash multi-set optimized for vector map join. + * An bytes key hash multi-set optimized for vector map join. + * + * This is the abstract base for the multi-key and string bytes key hash multi-set implementations. */ public abstract class VectorMapJoinFastBytesHashMultiSet extends VectorMapJoinFastBytesHashTable @@ -51,7 +58,6 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); - keysAssigned++; } else { // Add another value. // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 990a2e5..841183e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -27,7 +27,9 @@ import org.apache.hive.common.util.HashCodeUtil; /* - * An single byte array value hash multi-set optimized for vector map join. + * An bytes key hash set optimized for vector map join. + * + * This is the abstract base for the multi-key and string bytes key hash set implementations. */ public abstract class VectorMapJoinFastBytesHashSet extends VectorMapJoinFastBytesHashTable @@ -50,7 +52,6 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Existence - keysAssigned++; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index 6b536f0..d6e107b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -42,8 +42,7 @@ protected VectorMapJoinFastKeyStore keyStore; - private BytesWritable testKeyBytesWritable; - private BytesWritable testValueBytesWritable; + protected BytesWritable testKeyBytesWritable; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { @@ -53,17 +52,6 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws add(keyBytes, 0, keyLength, currentValue); } - @VisibleForTesting - public void putRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException { - if (testKeyBytesWritable == null) { - testKeyBytesWritable = new BytesWritable(); - testValueBytesWritable = new BytesWritable(); - } - testKeyBytesWritable.set(currentKey, 0, currentKey.length); - testValueBytesWritable.set(currentValue, 0, currentValue.length); - putRow(testKeyBytesWritable, testValueBytesWritable); - } - protected abstract void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, long hashCode, boolean isNewKey, BytesWritable currentValue); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index 1384fc9..cd51d0d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -18,17 +18,22 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single long value map optimized for vector map join. + * An single LONG key hash map optimized for vector map join. */ public class VectorMapJoinFastLongHashMap extends VectorMapJoinFastLongHashTable @@ -38,11 +43,26 @@ protected VectorMapJoinFastValueStore valueStore; + private BytesWritable testValueBytesWritable; + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } + /* + * A Unit Test convenience method for putting key and value into the hash table using the + * actual types. + */ + @VisibleForTesting + public void testPutRow(long currentKey, byte[] currentValue) throws HiveException, IOException { + if (testValueBytesWritable == null) { + testValueBytesWritable = new BytesWritable(); + } + testValueBytesWritable.set(currentValue, 0, currentValue.length); + add(currentKey, testValueBytesWritable); + } + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 94bf706..032233a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -31,8 +31,10 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single long value multi-set optimized for vector map join. + * An single LONG key hash multi-set optimized for vector map join. */ public class VectorMapJoinFastLongHashMultiSet extends VectorMapJoinFastLongHashTable @@ -45,6 +47,15 @@ public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { return new VectorMapJoinFastHashMultiSet.HashMultiSetResult(); } + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(long currentKey) throws HiveException, IOException { + add(currentKey, null); + } + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 2cbc548..21701d4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -18,18 +18,23 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; +import com.google.common.annotations.VisibleForTesting; + /* - * An single long value multi-set optimized for vector map join. + * An single LONG key hash set optimized for vector map join. */ public class VectorMapJoinFastLongHashSet extends VectorMapJoinFastLongHashTable @@ -42,6 +47,15 @@ public VectorMapJoinHashSetResult createHashSetResult() { return new VectorMapJoinFastHashSet.HashSetResult(); } + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(long currentKey) throws HiveException, IOException { + add(currentKey, null); + } + @Override public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index f37f056..0a502e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -58,8 +58,6 @@ private long min; private long max; - private BytesWritable testValueBytesWritable; - @Override public boolean useMinMax() { return useMinMax; @@ -90,17 +88,6 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws add(key, currentValue); } - - @VisibleForTesting - public void putRow(long currentKey, byte[] currentValue) throws HiveException, IOException { - if (testValueBytesWritable == null) { - testValueBytesWritable = new BytesWritable(); - } - testValueBytesWritable.set(currentValue, 0, currentValue.length); - add(currentKey, testValueBytesWritable); - } - - protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); public void add(long key, BytesWritable currentValue) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index 9a9fb8d..cee3b3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -18,17 +18,34 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; + import com.google.common.annotations.VisibleForTesting; /* * An multi-key value hash map optimized for vector map join. + * + * The key is stored as the provided bytes (uninterpreted). */ public class VectorMapJoinFastMultiKeyHashMap extends VectorMapJoinFastBytesHashMap { + /* + * A Unit Test convenience method for putting key and value into the hash table using the + * actual types. + */ @VisibleForTesting - public VectorMapJoinFastMultiKeyHashMap(int initialCapacity, float loadFactor, int wbSize) { - this(false, initialCapacity, loadFactor, wbSize); + public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveException, IOException { + if (testKeyBytesWritable == null) { + testKeyBytesWritable = new BytesWritable(); + testValueBytesWritable = new BytesWritable(); + } + testKeyBytesWritable.set(currentKey, 0, currentKey.length); + testValueBytesWritable.set(currentValue, 0, currentValue.length); + putRow(testKeyBytesWritable, testValueBytesWritable); } public VectorMapJoinFastMultiKeyHashMap( diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index a8744a5..ff82ac4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -18,15 +18,38 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.annotations.VisibleForTesting; + /* - * An multi-key value hash multi-set optimized for vector map join. + * An multi-key hash multi-set optimized for vector map join. + * + * The key is stored as the provided bytes (uninterpreted). */ public class VectorMapJoinFastMultiKeyHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(byte[] currentKey) throws HiveException, IOException { + if (testKeyBytesWritable == null) { + testKeyBytesWritable = new BytesWritable(); + } + testKeyBytesWritable.set(currentKey, 0, currentKey.length); + putRow(testKeyBytesWritable, null); + } + public VectorMapJoinFastMultiKeyHashMultiSet( boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize) { super(initialCapacity, loadFactor, writeBuffersSize); } + } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index a8048e5..de0666d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -18,15 +18,39 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.io.BytesWritable; + +import com.google.common.annotations.VisibleForTesting; + /* - * An multi-key value hash set optimized for vector map join. + * An multi-key hash set optimized for vector map join. + * + * The key is stored as the provided bytes (uninterpreted). */ public class VectorMapJoinFastMultiKeyHashSet extends VectorMapJoinFastBytesHashSet { + /* + * A Unit Test convenience method for putting the key into the hash table using the + * actual type. + */ + @VisibleForTesting + public void testPutRow(byte[] currentKey) throws HiveException, IOException { + if (testKeyBytesWritable == null) { + testKeyBytesWritable = new BytesWritable(); + } + testKeyBytesWritable.set(currentKey, 0, currentKey.length); + putRow(testKeyBytesWritable, null); + } + public VectorMapJoinFastMultiKeyHashSet( boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize) { super(initialCapacity, loadFactor, writeBuffersSize); } + + } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index 6f181b2..35af1d1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -24,7 +24,9 @@ import org.apache.hadoop.io.BytesWritable; /* - * An single byte array value hash map optimized for vector map join. + * An single STRING key hash map optimized for vector map join. + * + * The key will be deserialized and just the bytes will be stored. */ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 9653b71..36120b7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -24,7 +24,9 @@ import org.apache.hadoop.io.BytesWritable; /* - * An single byte array value hash map optimized for vector map join. + * An single STRING key hash multi-set optimized for vector map join. + * + * The key will be deserialized and just the bytes will be stored. */ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index 6419a0b..2ed6ab3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -24,7 +24,9 @@ import org.apache.hadoop.io.BytesWritable; /* - * An single byte array value hash map optimized for vector map join. + * An single STRING key hash set optimized for vector map join. + * + * The key will be deserialized and just the bytes will be stored. */ public class VectorMapJoinFastStringHashSet extends VectorMapJoinFastBytesHashSet { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java index 570a747..f96e32b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastValueStore.java @@ -23,7 +23,9 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; -import org.apache.hadoop.hive.serde2.WriteBuffers.Position;; +import org.apache.hadoop.hive.serde2.WriteBuffers.Position; + +import com.google.common.base.Preconditions; // Supports random access. @@ -142,7 +144,6 @@ public HashMapResult() { } public void set(VectorMapJoinFastValueStore valueStore, long valueRefWord) { - // LOG.debug("VectorMapJoinFastValueStore set valueRefWord " + Long.toHexString(valueRefWord)); this.valueStore = valueStore; this.valueRefWord = valueRefWord; @@ -217,6 +218,10 @@ public ByteSegmentRef internalRead() { if (readIndex == 0) { /* + * Positioned to first. + */ + + /* * Extract information from reference word from slot table. */ absoluteValueOffset = @@ -226,19 +231,32 @@ public ByteSegmentRef internalRead() { valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos); if (isSingleRow) { + /* + * One element. + */ isNextEof = true; valueLength = (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift); boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn); if (!isValueLengthSmall) { - // And, if current value is big we must read it. + + // {Big Value Len} {Big Value Bytes} valueLength = valueStore.writeBuffers.readVInt(readPos); + } else { + + // {Small Value Bytes} + // (use small length from valueWordRef) } } else { + /* + * First of Multiple elements. + */ isNextEof = false; - // 2nd and beyond records have a relative offset word at the beginning. + /* + * Read the relative offset word at the beginning 2nd and beyond records. + */ long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos); long relativeOffset = @@ -246,25 +264,31 @@ public ByteSegmentRef internalRead() { nextAbsoluteValueOffset = absoluteValueOffset - relativeOffset; + valueLength = + (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift); + boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn); + + /* + * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes} + * Since this is the first record, the valueRefWord directs us. + */ + if (!isValueLengthSmall) { + valueLength = valueStore.writeBuffers.readVInt(readPos); + } + isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0); isNextValueLengthSmall = ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0); - } - valueLength = - (int) ((valueRefWord & SmallValueLength.bitMask) >> SmallValueLength.bitShift); - boolean isValueLengthSmall = (valueLength != SmallValueLength.allBitsOn); - if (!isValueLengthSmall) { - // And, if current value is big we must read it. - valueLength = valueStore.writeBuffers.readVInt(readPos); - } - - // 2nd and beyond have the next value's small length in the current record. - if (isNextValueLengthSmall) { - nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos); - } else { - nextSmallValueLength = -1; - } + /* + * Optionally, the next value's small length could be a 2nd integer... + */ + if (isNextValueLengthSmall) { + nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos); + } else { + nextSmallValueLength = -1; + } + } } else { if (isNextEof) { @@ -277,24 +301,37 @@ public ByteSegmentRef internalRead() { valueStore.writeBuffers.setReadPoint(absoluteValueOffset, readPos); if (isNextLast) { + /* + * No realativeOffsetWord in last value. (This was the first value written.) + */ isNextEof = true; if (isNextValueLengthSmall) { + + // {Small Value Bytes} valueLength = nextSmallValueLength; } else { - valueLength = (int) valueStore.writeBuffers.readVLong(readPos); + + // {Big Value Len} {Big Value Bytes} + valueLength = valueStore.writeBuffers.readVInt(readPos); } } else { + /* + * {Rel Offset Word} [Big Value Len] [Next Value Small Len] {Value Bytes} + * + * 2nd and beyond records have a relative offset word at the beginning. + */ isNextEof = false; - // 2nd and beyond records have a relative offset word at the beginning. long relativeOffsetWord = valueStore.writeBuffers.readVLong(readPos); - // Read current value's big length now, if necessary. + /* + * Optionally, read current value's big length. {Big Value Len} {Big Value Bytes} + */ if (isNextValueLengthSmall) { valueLength = nextSmallValueLength; } else { - valueLength = (int) valueStore.writeBuffers.readVLong(readPos); + valueLength = valueStore.writeBuffers.readVInt(readPos); } long relativeOffset = @@ -305,9 +342,13 @@ public ByteSegmentRef internalRead() { isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0); isNextValueLengthSmall = ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0); + + /* + * Optionally, the next value's small length could be a 2nd integer in the value's + * information. + */ if (isNextValueLengthSmall) { - // TODO: Write readVInt - nextSmallValueLength = (int) valueStore.writeBuffers.readVLong(readPos); + nextSmallValueLength = valueStore.writeBuffers.readVInt(readPos); } else { nextSmallValueLength = -1; } @@ -396,6 +437,51 @@ public String toString() { private static final long flagOnMask = 1L << bitShift; } + private static String valueRefWordToString(long valueRef) { + StringBuilder sb = new StringBuilder(); + + sb.append(Long.toHexString(valueRef)); + sb.append(", "); + if ((valueRef & IsInvalidFlag.flagOnMask) != 0) { + sb.append("(Invalid optimized hash table reference), "); + } + /* + * Extract information. + */ + long absoluteValueOffset = + (valueRef & AbsoluteValueOffset.bitMask); + int smallValueLength = + (int) ((valueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift); + boolean isValueLengthSmall = (smallValueLength != SmallValueLength.allBitsOn); + int cappedCount = + (int) ((valueRef & CappedCount.bitMask) >> CappedCount.bitShift); + boolean isValueLast = + ((valueRef & IsLastFlag.flagOnMask) != 0); + + sb.append("absoluteValueOffset "); + sb.append(absoluteValueOffset); + sb.append(" ("); + sb.append(Long.toHexString(absoluteValueOffset)); + sb.append("), "); + + if (isValueLengthSmall) { + sb.append("smallValueLength "); + sb.append(smallValueLength); + sb.append(", "); + } else { + sb.append("isValueLengthSmall = false, "); + } + + sb.append("cappedCount "); + sb.append(cappedCount); + sb.append(", "); + + sb.append("isValueLast "); + sb.append(isValueLast); + + return sb.toString(); + } + /** * Relative Offset Word stored at the beginning of all but the last value that has a * relative offset and 2 flags. @@ -431,6 +517,33 @@ public String toString() { private static final long bitMask = allBitsOn << bitShift; } + private static String relativeOffsetWordToString(long relativeOffsetWord) { + StringBuilder sb = new StringBuilder(); + + sb.append(Long.toHexString(relativeOffsetWord)); + sb.append(", "); + + long nextRelativeOffset = + (relativeOffsetWord & NextRelativeValueOffset.bitMask) >> NextRelativeValueOffset.bitShift; + sb.append("nextRelativeOffset "); + sb.append(nextRelativeOffset); + sb.append(" ("); + sb.append(Long.toHexString(nextRelativeOffset)); + sb.append("), "); + + boolean isNextLast = ((relativeOffsetWord & IsNextValueLastFlag.flagOnMask) != 0); + sb.append("isNextLast "); + sb.append(isNextLast); + sb.append(", "); + + boolean isNextValueLengthSmall = + ((relativeOffsetWord & IsNextValueLengthSmallFlag.flagOnMask) != 0); + sb.append("isNextValueLengthSmall "); + sb.append(isNextValueLengthSmall); + + return sb.toString(); + } + public long addFirst(byte[] valueBytes, int valueStart, int valueLength) { // First value is written without: next relative offset, next value length, is next value last @@ -473,8 +586,6 @@ public long addFirst(byte[] valueBytes, int valueStart, int valueLength) { valueRefWord |= SmallValueLength.allBitsOnBitShifted; } - // LOG.debug("VectorMapJoinFastValueStore addFirst valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " valueRefWord " + Long.toHexString(valueRefWord)); - // The lower bits are the absolute value offset. valueRefWord |= newAbsoluteOffset; @@ -499,8 +610,6 @@ public long addMore(long oldValueRef, byte[] valueBytes, int valueStart, int val boolean isOldValueLast = ((oldValueRef & IsLastFlag.flagOnMask) != 0); - // LOG.debug("VectorMapJoinFastValueStore addMore isOldValueLast " + isOldValueLast + " oldSmallValueLength " + oldSmallValueLength + " oldAbsoluteValueOffset " + oldAbsoluteValueOffset + " oldValueRef " + Long.toHexString(oldValueRef)); - /* * Write information about the old value (which becomes our next) at the beginning * of our new value. @@ -523,12 +632,6 @@ public long addMore(long oldValueRef, byte[] valueBytes, int valueStart, int val writeBuffers.writeVLong(relativeOffsetWord); - // When the next value is small it was not recorded with the old (i.e. next) value and we - // have to remember it. - if (isOldValueLengthSmall) { - writeBuffers.writeVInt(oldSmallValueLength); - } - // Now, we have written all information about the next value, work on the *new* value. long newValueRef = ((long) newCappedCount) << CappedCount.bitShift; @@ -536,18 +639,28 @@ public long addMore(long oldValueRef, byte[] valueBytes, int valueStart, int val if (!isNewValueSmall) { // Use magic value to indicating we are writing the big value length. newValueRef |= ((long) SmallValueLength.allBitsOn << SmallValueLength.bitShift); + Preconditions.checkState( + (int) ((newValueRef & SmallValueLength.bitMask) >> SmallValueLength.bitShift) == + SmallValueLength.allBitsOn); writeBuffers.writeVInt(valueLength); + } else { // Caller must remember small value length. newValueRef |= ((long) valueLength) << SmallValueLength.bitShift; } + + // When the next value is small it was not recorded with the old (i.e. next) value and we + // have to remember it. + if (isOldValueLengthSmall) { + + writeBuffers.writeVInt(oldSmallValueLength); + } + writeBuffers.write(valueBytes, valueStart, valueLength); // The lower bits are the absolute value offset. newValueRef |= newAbsoluteOffset; - // LOG.debug("VectorMapJoinFastValueStore addMore valueLength " + valueLength + " newAbsoluteOffset " + newAbsoluteOffset + " newValueRef " + Long.toHexString(newValueRef)); - return newValueRef; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java deleted file mode 100644 index 2d4baa0..0000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/RandomRowObjectSource.java +++ /dev/null @@ -1,388 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector; - -import java.sql.Date; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; - -import junit.framework.TestCase; - -import org.apache.commons.lang.ArrayUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.common.type.HiveChar; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; -import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; -import org.apache.hadoop.hive.common.type.HiveVarchar; -import org.apache.hadoop.hive.common.type.RandomTypeUtil; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hive.common.util.DateUtils; - -/** - * Generate object inspector and random row object[]. - */ -public class RandomRowObjectSource { - - private Random r; - - private int columnCount; - - private List typeNames; - - private PrimitiveCategory[] primitiveCategories; - - private PrimitiveTypeInfo[] primitiveTypeInfos; - - private List primitiveObjectInspectorList; - - private StructObjectInspector rowStructObjectInspector; - - public List typeNames() { - return typeNames; - } - - public PrimitiveCategory[] primitiveCategories() { - return primitiveCategories; - } - - public PrimitiveTypeInfo[] primitiveTypeInfos() { - return primitiveTypeInfos; - } - - public StructObjectInspector rowStructObjectInspector() { - return rowStructObjectInspector; - } - - public void init(Random r) { - this.r = r; - chooseSchema(); - } - - private static String[] possibleHiveTypeNames = { - "boolean", - "tinyint", - "smallint", - "int", - "bigint", - "date", - "float", - "double", - "string", - "char", - "varchar", - "binary", - "date", - "timestamp", - serdeConstants.INTERVAL_YEAR_MONTH_TYPE_NAME, - serdeConstants.INTERVAL_DAY_TIME_TYPE_NAME, - "decimal" - }; - - private void chooseSchema() { - columnCount = 1 + r.nextInt(20); - typeNames = new ArrayList(columnCount); - primitiveCategories = new PrimitiveCategory[columnCount]; - primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; - primitiveObjectInspectorList = new ArrayList(columnCount); - List columnNames = new ArrayList(columnCount); - for (int c = 0; c < columnCount; c++) { - columnNames.add(String.format("col%d", c)); - int typeNum = r.nextInt(possibleHiveTypeNames.length); - String typeName = possibleHiveTypeNames[typeNum]; - if (typeName.equals("char")) { - int maxLength = 1 + r.nextInt(100); - typeName = String.format("char(%d)", maxLength); - } else if (typeName.equals("varchar")) { - int maxLength = 1 + r.nextInt(100); - typeName = String.format("varchar(%d)", maxLength); - } else if (typeName.equals("decimal")) { - typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); - } - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); - primitiveTypeInfos[c] = primitiveTypeInfo; - PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); - primitiveCategories[c] = primitiveCategory; - primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); - typeNames.add(typeName); - } - rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); - } - - public Object[][] randomRows(int n) { - Object[][] result = new Object[n][]; - for (int i = 0; i < n; i++) { - result[i] = randomRow(); - } - return result; - } - - public Object[] randomRow() { - Object row[] = new Object[columnCount]; - for (int c = 0; c < columnCount; c++) { - Object object = randomObject(c); - if (object == null) { - throw new Error("Unexpected null for column " + c); - } - row[c] = getWritableObject(c, object); - if (row[c] == null) { - throw new Error("Unexpected null for writable for column " + c); - } - } - return row; - } - - public Object getWritableObject(int column, Object object) { - ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); - PrimitiveCategory primitiveCategory = primitiveCategories[column]; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; - switch (primitiveCategory) { - case BOOLEAN: - return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); - case BYTE: - return ((WritableByteObjectInspector) objectInspector).create((byte) object); - case SHORT: - return ((WritableShortObjectInspector) objectInspector).create((short) object); - case INT: - return ((WritableIntObjectInspector) objectInspector).create((int) object); - case LONG: - return ((WritableLongObjectInspector) objectInspector).create((long) object); - case DATE: - return ((WritableDateObjectInspector) objectInspector).create((Date) object); - case FLOAT: - return ((WritableFloatObjectInspector) objectInspector).create((float) object); - case DOUBLE: - return ((WritableDoubleObjectInspector) objectInspector).create((double) object); - case STRING: - return ((WritableStringObjectInspector) objectInspector).create((String) object); - case CHAR: - { - WritableHiveCharObjectInspector writableCharObjectInspector = - new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); - return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); - } - case VARCHAR: - { - WritableHiveVarcharObjectInspector writableVarcharObjectInspector = - new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); - return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); - } - case BINARY: - return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); - case TIMESTAMP: - return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); - case INTERVAL_YEAR_MONTH: - return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); - case INTERVAL_DAY_TIME: - return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); - case DECIMAL: - { - WritableHiveDecimalObjectInspector writableDecimalObjectInspector = - new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); - return writableDecimalObjectInspector.create(HiveDecimal.ZERO); - } - default: - throw new Error("Unknown primitive category " + primitiveCategory); - } - } - - public Object randomObject(int column) { - PrimitiveCategory primitiveCategory = primitiveCategories[column]; - PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; - switch (primitiveCategory) { - case BOOLEAN: - return Boolean.valueOf(r.nextInt(1) == 1); - case BYTE: - return Byte.valueOf((byte) r.nextInt()); - case SHORT: - return Short.valueOf((short) r.nextInt()); - case INT: - return Integer.valueOf(r.nextInt()); - case LONG: - return Long.valueOf(r.nextLong()); - case DATE: - return getRandDate(r); - case FLOAT: - return Float.valueOf(r.nextFloat() * 10 - 5); - case DOUBLE: - return Double.valueOf(r.nextDouble() * 10 - 5); - case STRING: - return getRandString(r); - case CHAR: - return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); - case VARCHAR: - return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); - case BINARY: - return getRandBinary(r, 1 + r.nextInt(100)); - case TIMESTAMP: - return RandomTypeUtil.getRandTimestamp(r); - case INTERVAL_YEAR_MONTH: - return getRandIntervalYearMonth(r); - case INTERVAL_DAY_TIME: - return getRandIntervalDayTime(r); - case DECIMAL: - return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); - default: - throw new Error("Unknown primitive category " + primitiveCategory); - } - } - - public static String getRandString(Random r) { - return getRandString(r, null, r.nextInt(10)); - } - - public static String getRandString(Random r, String characters, int length) { - if (characters == null) { - characters = "ABCDEFGHIJKLMabcdefghijklm"; - } - StringBuilder sb = new StringBuilder(); - sb.append(""); - for (int i = 0; i < length; i++) { - if (characters == null) { - sb.append((char) (r.nextInt(128))); - } else { - sb.append(characters.charAt(r.nextInt(characters.length()))); - } - } - return sb.toString(); - } - - public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { - int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); - String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); - HiveChar hiveChar = new HiveChar(randomString, maxLength); - return hiveChar; - } - - public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { - int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); - String randomString = getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); - HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); - return hiveVarchar; - } - - public static byte[] getRandBinary(Random r, int len){ - byte[] bytes = new byte[len]; - for (int j = 0; j < len; j++){ - bytes[j] = Byte.valueOf((byte) r.nextInt()); - } - return bytes; - } - - private static final String DECIMAL_CHARS = "0123456789"; - - public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { - while (true) { - StringBuilder sb = new StringBuilder(); - int precision = 1 + r.nextInt(18); - int scale = 0 + r.nextInt(precision + 1); - - int integerDigits = precision - scale; - - if (r.nextBoolean()) { - sb.append("-"); - } - - if (integerDigits == 0) { - sb.append("0"); - } else { - sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); - } - if (scale != 0) { - sb.append("."); - sb.append(getRandString(r, DECIMAL_CHARS, scale)); - } - - HiveDecimal bd = HiveDecimal.create(sb.toString()); - if (bd.scale() > bd.precision()) { - // Sometimes weird decimals are produced? - continue; - } - - return bd; - } - } - - public static Date getRandDate(Random r) { - String dateStr = String.format("%d-%02d-%02d", - Integer.valueOf(1800 + r.nextInt(500)), // year - Integer.valueOf(1 + r.nextInt(12)), // month - Integer.valueOf(1 + r.nextInt(28))); // day - Date dateVal = Date.valueOf(dateStr); - return dateVal; - } - - public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { - String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; - String intervalYearMonthStr = String.format("%s%d-%d", - yearMonthSignStr, - Integer.valueOf(1800 + r.nextInt(500)), // year - Integer.valueOf(0 + r.nextInt(12))); // month - HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); - TestCase.assertTrue(intervalYearMonthVal != null); - return intervalYearMonthVal; - } - - public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { - String optionalNanos = ""; - if (r.nextInt(2) == 1) { - optionalNanos = String.format(".%09d", - Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); - } - String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; - String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", - yearMonthSignStr, - Integer.valueOf(1 + r.nextInt(28)), // day - Integer.valueOf(0 + r.nextInt(24)), // hour - Integer.valueOf(0 + r.nextInt(60)), // minute - Integer.valueOf(0 + r.nextInt(60)), // second - optionalNanos); - HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); - TestCase.assertTrue(intervalDayTimeVal != null); - return intervalDayTimeVal; - } -} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java index 959a2af..c55d951 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorRowObject.java @@ -19,13 +19,10 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import java.util.Random; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import junit.framework.TestCase; @@ -50,7 +47,7 @@ void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow, } } - void testVectorRowObject(int caseNum, Random r) throws HiveException { + void testVectorRowObject(int caseNum, boolean sort, Random r) throws HiveException { String[] emptyScratchTypeNames = new String[0]; @@ -74,6 +71,9 @@ void testVectorRowObject(int caseNum, Random r) throws HiveException { vectorExtractRow.init(source.typeNames()); Object[][] randomRows = source.randomRows(100000); + if (sort) { + source.sort(randomRows); + } int firstRandomRowIndex = 0; for (int i = 0; i < randomRows.length; i++) { Object[] row = randomRows[i]; @@ -93,14 +93,22 @@ void testVectorRowObject(int caseNum, Random r) throws HiveException { public void testVectorRowObject() throws Throwable { - try { - Random r = new Random(5678); - for (int c = 0; c < 10; c++) { - testVectorRowObject(c, r); + try { + Random r = new Random(5678); + + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testVectorRowObject(caseNum, false, r); + caseNum++; + } + + // Try one sorted. + testVectorRowObject(caseNum, true, r); + caseNum++; + + } catch (Throwable e) { + e.printStackTrace(); + throw e; } - } catch (Throwable e) { - e.printStackTrace(); - throw e; - } } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java index e37d2bf..da69ee3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java @@ -22,8 +22,6 @@ import java.sql.Date; import java.sql.Timestamp; import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; import java.util.Properties; import java.util.Random; @@ -50,6 +48,7 @@ import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; import org.apache.hadoop.hive.serde2.fast.DeserializeRead; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; import org.apache.hadoop.io.BooleanWritable; @@ -86,7 +84,7 @@ LAZY_SIMPLE } - void deserializeAndVerify(Output output, DeserializeRead deserializeRead, + void deserializeAndVerify(Output output, DeserializeRead deserializeRead, RandomRowObjectSource source, Object[] expectedRow) throws HiveException, IOException { deserializeRead.set(output.getData(), 0, output.getLength()); @@ -523,7 +521,7 @@ private Properties createProperties(String fieldNames, String fieldTypes) { // Set the configuration parameters tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - + tbl.setProperty("columns", fieldNames); tbl.setProperty("columns.types", fieldTypes); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java new file mode 100644 index 0000000..3a23584 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -0,0 +1,721 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Random; +import java.util.TreeMap; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.WritableComparator; + +import com.google.common.base.Preconditions; + +import static org.junit.Assert.*; + +public class CheckFastHashTable { + + public static boolean findMatch(byte[] valueBytes, List actualValues, int actualCount, boolean[] taken) { + for (int i = 0; i < actualCount; i++) { + if (!taken[i]) { + byte[] actualBytes = actualValues.get(i); + if (StringExpr.compare(valueBytes, 0, valueBytes.length, actualBytes, 0, actualBytes.length) == 0) { + taken[i] = true; + return true; + } + } + } + return false; + } + + public static void verifyHashMapValues(VectorMapJoinHashMapResult hashMapResult, + List values) { + + int valueCount = values.size(); + + WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); + + // Read through all values. + List actualValues = new ArrayList(); + while (true) { + byte[] bytes = ref.getBytes(); + int offset = (int) ref.getOffset(); + int length = ref.getLength(); + + if (length == 0) { + actualValues.add(new byte[0]); + } else { + actualValues.add(Arrays.copyOfRange(bytes, offset, offset + length)); + } + ref = hashMapResult.next(); + if (ref == null) { + break; + } + } + + int actualCount = actualValues.size(); + + if (valueCount != actualCount) { + TestCase.fail("values.size() " + valueCount + " does not match actualCount " + actualCount); + } + + boolean[] taken = new boolean[actualCount]; + + for (int i = 0; i < actualCount; i++) { + byte[] valueBytes = values.get(i); + + if (!findMatch(valueBytes, actualValues, actualCount, taken)) { + List availableLengths = new ArrayList(); + for (int a = 0; a < actualCount; a++) { + if (!taken[a]) { + availableLengths.add(actualValues.get(a).length); + } + } + TestCase.fail("No match for actual value (valueBytes length " + valueBytes.length + + ", availableLengths " + availableLengths.toString() + " of " + actualCount + " total)"); + } + } + } + + /* + * Element for Key: Long x Hash Table: HashMap + */ + public static class FastLongHashMapElement { + private long key; + private List values; + + public FastLongHashMapElement(long key, byte[] firstValue) { + this.key = key; + values = new ArrayList(); + values.add(firstValue); + } + + public long getKey() { + return key; + } + + public int getValueCount() { + return values.size(); + } + + public List getValues() { + return values; + } + + public void addValue(byte[] value) { + values.add(value); + } + } + + /* + * Verify table for Key: Long x Hash Table: HashMap + */ + public static class VerifyFastLongHashMap { + + private int count; + + private FastLongHashMapElement[] array; + + private HashMap keyValueMap; + + public VerifyFastLongHashMap() { + count = 0; + array = new FastLongHashMapElement[50]; + keyValueMap = new HashMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(long key) { + return keyValueMap.containsKey(key); + } + + public void add(long key, byte[] value) { + if (keyValueMap.containsKey(key)) { + int index = keyValueMap.get(key); + array[index].addValue(value); + } else { + if (count >= array.length) { + // Grow. + FastLongHashMapElement[] newArray = new FastLongHashMapElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastLongHashMapElement(key, value); + keyValueMap.put(key, count); + count++; + } + } + + public long addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].addValue(value); + return array[index].getKey(); + } + + public long getKey(int index) { + return array[index].getKey(); + } + + public List getValues(int index) { + return array[index].getValues(); + } + + public void verify(VectorMapJoinFastLongHashMap map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastLongHashMapElement element = array[index]; + long key = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + } + } + + /* + * Element for Key: byte[] x Hash Table: HashMap + */ + public static class FastBytesHashMapElement { + private byte[] key; + private List values; + + public FastBytesHashMapElement(byte[] key, byte[] firstValue) { + this.key = key; + values = new ArrayList(); + values.add(firstValue); + } + + public byte[] getKey() { + return key; + } + + public int getValueCount() { + return values.size(); + } + + public List getValues() { + return values; + } + + public void addValue(byte[] value) { + values.add(value); + } + } + + /* + * Verify table for Key: byte[] x Hash Table: HashMap + */ + public static class VerifyFastBytesHashMap { + + private int count; + + private FastBytesHashMapElement[] array; + + private TreeMap keyValueMap; + + public VerifyFastBytesHashMap() { + count = 0; + array = new FastBytesHashMapElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key, byte[] value) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + int index = keyValueMap.get(keyBytesWritable); + array[index].addValue(value); + } else { + if (count >= array.length) { + // Grow. + FastBytesHashMapElement[] newArray = new FastBytesHashMapElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastBytesHashMapElement(key, value); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].addValue(value); + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public List getValues(int index) { + return array[index].getValues(); + } + + public void verify(VectorMapJoinFastBytesHashMap map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + } + } + + /* + * Element for Key: Long x Hash Table: HashMultiSet + */ + public static class FastLongHashMultiSetElement { + private long key; + private int multiSetCount; + + public FastLongHashMultiSetElement(long key) { + this.key = key; + multiSetCount = 1; + } + + public long getKey() { + return key; + } + + public int getMultiSetCount() { + return multiSetCount; + } + + public void incrementMultiSetCount() { + multiSetCount++; + } + } + + /* + * Verify table for Key: Long x Hash Table: HashMultiSet + */ + public static class VerifyFastLongHashMultiSet { + + private int count; + + private FastLongHashMultiSetElement[] array; + + private HashMap keyValueMap; + + public VerifyFastLongHashMultiSet() { + count = 0; + array = new FastLongHashMultiSetElement[50]; + keyValueMap = new HashMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(long key) { + return keyValueMap.containsKey(key); + } + + public void add(long key) { + if (keyValueMap.containsKey(key)) { + int index = keyValueMap.get(key); + array[index].incrementMultiSetCount(); + } else { + if (count >= array.length) { + // Grow. + FastLongHashMultiSetElement[] newArray = new FastLongHashMultiSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastLongHashMultiSetElement(key); + keyValueMap.put(key, count); + count++; + } + } + + public long addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].incrementMultiSetCount(); + return array[index].getKey(); + } + + public long getKey(int index) { + return array[index].getKey(); + } + + public int getMultiSetCount(int index) { + return array[index].getMultiSetCount(); + } + + public void verify(VectorMapJoinFastLongHashMultiSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastLongHashMultiSetElement element = array[index]; + long key = element.getKey(); + int multiSetCount = element.getMultiSetCount(); + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, hashMultiSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + assertEquals(hashMultiSetResult.count(), multiSetCount); + } + } + } + + /* + * Element for Key: byte[] x Hash Table: HashMultiSet + */ + public static class FastBytesHashMultiSetElement { + private byte[] key; + private int multiSetCount; + + public FastBytesHashMultiSetElement(byte[] key) { + this.key = key; + multiSetCount = 1; + } + + public byte[] getKey() { + return key; + } + + public int getMultiSetCount() { + return multiSetCount; + } + + public void incrementMultiSetCount() { + multiSetCount++; + } + } + + /* + * Verify table for Key: byte[] x Hash Table: HashMultiSet + */ + public static class VerifyFastBytesHashMultiSet { + + private int count; + + private FastBytesHashMultiSetElement[] array; + + private TreeMap keyValueMap; + + public VerifyFastBytesHashMultiSet() { + count = 0; + array = new FastBytesHashMultiSetElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + int index = keyValueMap.get(keyBytesWritable); + array[index].incrementMultiSetCount(); + } else { + if (count >= array.length) { + // Grow. + FastBytesHashMultiSetElement[] newArray = new FastBytesHashMultiSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastBytesHashMultiSetElement(key); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + array[index].incrementMultiSetCount(); + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public int getMultiSetCount(int index) { + return array[index].getMultiSetCount(); + } + + public void verify(VectorMapJoinFastBytesHashMultiSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastBytesHashMultiSetElement element = array[index]; + byte[] key = element.getKey(); + int multiSetCount = element.getMultiSetCount(); + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashMultiSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + assertEquals(hashMultiSetResult.count(), multiSetCount); + } + } + } + + /* + * Element for Key: Long x Hash Table: HashSet + */ + public static class FastLongHashSetElement { + private long key; + + public FastLongHashSetElement(long key) { + this.key = key; + } + + public long getKey() { + return key; + } + } + + /* + * Verify table for Key: Long x Hash Table: HashSet + */ + public static class VerifyFastLongHashSet { + + private int count; + + private FastLongHashSetElement[] array; + + private HashMap keyValueMap; + + public VerifyFastLongHashSet() { + count = 0; + array = new FastLongHashSetElement[50]; + keyValueMap = new HashMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(long key) { + return keyValueMap.containsKey(key); + } + + public void add(long key) { + if (keyValueMap.containsKey(key)) { + // Already exists. + } else { + if (count >= array.length) { + // Grow. + FastLongHashSetElement[] newArray = new FastLongHashSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastLongHashSetElement(key); + keyValueMap.put(key, count); + count++; + } + } + + public long addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + + // Exists aleady. + + return array[index].getKey(); + } + + public long getKey(int index) { + return array[index].getKey(); + } + + public void verify(VectorMapJoinFastLongHashSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastLongHashSetElement element = array[index]; + long key = element.getKey(); + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, hashSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + } + } + + /* + * Element for Key: byte[] x Hash Table: HashSet + */ + public static class FastBytesHashSetElement { + private byte[] key; + + public FastBytesHashSetElement(byte[] key) { + this.key = key; + } + + public byte[] getKey() { + return key; + } + } + + /* + * Verify table for Key: byte[] x Hash Table: HashSet + */ + public static class VerifyFastBytesHashSet { + + private int count; + + private FastBytesHashSetElement[] array; + + private TreeMap keyValueMap; + + public VerifyFastBytesHashSet() { + count = 0; + array = new FastBytesHashSetElement[50]; + + // We use BytesWritable because it supports Comparable for our TreeMap. + keyValueMap = new TreeMap(); + } + + public int getCount() { + return count; + } + + public boolean contains(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + return keyValueMap.containsKey(keyBytesWritable); + } + + public void add(byte[] key) { + BytesWritable keyBytesWritable = new BytesWritable(key, key.length); + if (keyValueMap.containsKey(keyBytesWritable)) { + // Already exists. + } else { + if (count >= array.length) { + // Grow. + FastBytesHashSetElement[] newArray = new FastBytesHashSetElement[array.length * 2]; + System.arraycopy(array, 0, newArray, 0, count); + array = newArray; + } + array[count] = new FastBytesHashSetElement(key); + keyValueMap.put(keyBytesWritable, count); + count++; + } + } + + public byte[] addRandomExisting(byte[] value, Random r) { + Preconditions.checkState(count > 0); + int index = r.nextInt(count); + + // Already exists. + + return array[index].getKey(); + } + + public byte[] getKey(int index) { + return array[index].getKey(); + } + + public void verify(VectorMapJoinFastBytesHashSet map) { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + for (int index = 0; index < count; index++) { + FastBytesHashSetElement element = array[index]; + byte[] key = element.getKey(); + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key, 0, key.length, hashSetResult); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + } + } + } +} \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java index c2375e0..90e8f33 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CommonFastHashTable.java @@ -18,16 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; import java.util.Random; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; -import org.apache.hadoop.hive.serde2.WriteBuffers; - -import static org.junit.Assert.*; - public class CommonFastHashTable { protected static final float LOAD_FACTOR = 0.75f; @@ -39,6 +31,10 @@ protected static final int LARGE_CAPACITY = 8388608; protected static Random random; + protected static int MAX_KEY_LENGTH = 100; + + protected static int MAX_VALUE_LENGTH = 1000; + public static int generateLargeCount() { int count = 0; if (random.nextInt(100) != 0) { @@ -75,54 +71,4 @@ public static int generateLargeCount() { } return count; } - public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult, - RandomByteArrayStream randomByteArrayStream ) { - - List resultBytes = new ArrayList(); - int count = 0; - if (hashMapResult.hasRows()) { - WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); - while (ref != null) { - count++; - byte[] bytes = ref.getBytes(); - int offset = (int) ref.getOffset(); - int length = ref.getLength(); - resultBytes.add(Arrays.copyOfRange(bytes, offset, offset + length)); - ref = hashMapResult.next(); - } - } else { - assertTrue(hashMapResult.isEof()); - } - if (randomByteArrayStream.size() != count) { - assertTrue(false); - } - - for (int i = 0; i < count; ++i) { - byte[] bytes = resultBytes.get(i); - if (!randomByteArrayStream.contains(bytes)) { - assertTrue(false); - } - } - } - - public static void verifyHashMapResult(VectorMapJoinHashMapResult hashMapResult, - byte[] valueBytes ) { - - assertTrue(hashMapResult.hasRows()); - WriteBuffers.ByteSegmentRef ref = hashMapResult.first(); - byte[] bytes = ref.getBytes(); - int offset = (int) ref.getOffset(); - int length = ref.getLength(); - assertTrue(valueBytes.length == length); - boolean match = true; // Assume - for (int j = 0; j < length; j++) { - if (valueBytes[j] != bytes[offset + j]) { - match = false; - break; - } - } - if (!match) { - assertTrue(false); - } - } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java new file mode 100644 index 0000000..bbfa65f --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMap.java @@ -0,0 +1,272 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMap extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + } + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(1002); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key1); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key1, value); + verifyTable.add(key1, value); + verifyTable.verify(map); + + byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key2); + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key2, 0, key2.length, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertTrue(!hashMapResult.hasRows()); + + map.testPutRow(key2, value); + verifyTable.add(key2, value); + verifyTable.verify(map); + + byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key3); + hashMapResult = map.createHashMapResult(); + joinResult = map.lookup(key3, 0, key3.length, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertTrue(!hashMapResult.hasRows()); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(200001); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap(false,CAPACITY, 1f, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + for (int i = 0; i < CAPACITY; i++) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + byte[] anotherKey; + while (true) { + anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(anotherKey); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(anotherKey, 0, anotherKey.length, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(99221); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap(false,1, 0.0000001f, WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + for (int i = 0; i < 18; ++i) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + // verifyTable.verify(map); + } + } + verifyTable.verify(map); + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(21111); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java new file mode 100644 index 0000000..449a8b2 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMultiSet.java @@ -0,0 +1,253 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMultiSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +public class TestVectorMapJoinFastBytesHashMultiSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(5255); + + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(2374); + + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(98222); + + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key1); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key2); + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, 0, key2.length, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key3); + hashMultiSetResult = map.createHashMultiSetResult(); + joinResult = map.contains(key3, 0, key3.length, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertEquals(hashMultiSetResult.count(), 0); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(9024); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet(false,CAPACITY, 1f, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + for (int i = 0; i < CAPACITY; i++) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + byte[] anotherKey; + while (true) { + anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(anotherKey); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, 0, anotherKey.length, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(2933); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet(false,1, 0.0000001f, WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + for (int i = 0; i < 18; ++i) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMultiSet map, VerifyFastBytesHashMultiSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + } + verifyTable.verify(map); + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(5445); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(5637); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMultiSet map = + new VectorMapJoinFastMultiKeyHashMultiSet( + false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashMultiSet verifyTable = new VerifyFastBytesHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java new file mode 100644 index 0000000..ef7c91c --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +public class TestVectorMapJoinFastBytesHashSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(81104); + + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(1120); + + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(2293); + + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + byte[] key1 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key1); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + byte[] key2 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key2); + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, 0, key2.length, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + byte[] key3 = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key3); + hashSetResult = map.createHashSetResult(); + joinResult = map.contains(key3, 0, key3.length, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(219); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + for (int i = 0; i < CAPACITY; i++) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + byte[] anotherKey; + while (true) { + anotherKey = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(anotherKey); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, 0, anotherKey.length, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(773); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + for (int i = 0; i < 18; ++i) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashSet map, VerifyFastBytesHashSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + } + verifyTable.verify(map); + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(8462); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashSet map = + new VectorMapJoinFastMultiKeyHashSet( + false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java index a45275b..e8bbee3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMap.java @@ -18,11 +18,14 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; +import java.io.IOException; import java.util.Random; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.junit.Test; @@ -31,91 +34,141 @@ public class TestVectorMapJoinFastLongHashMap extends CommonFastHashTable { @Test - public void testPutGetOne() throws Exception { - random = new Random(47496); + public void testOneKey() throws Exception { + random = new Random(33221); VectorMapJoinFastLongHashMap map = - new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(0)); - - key = randomLongKeyStream.next(); - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(1)); + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); } @Test - public void testPutGetMultiple() throws Exception { - random = new Random(2990); + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, value); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); - // Same key, multiple values. - for (int i = 0; i < 3; ++i) { - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream); + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); } + verifyTable.verify(map); } @Test public void testGetNonExistent() throws Exception { - random = new Random(16916); + random = new Random(450); - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); + long key1 = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); - key += 1; - map.putRow(key, value); + map.testPutRow(key1, value); + verifyTable.add(key1, value); + verifyTable.verify(map); - key += 1; + long key2 = key1 += 1; VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); + JoinUtil.JoinResult joinResult = map.lookup(key2, hashMapResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertTrue(!hashMapResult.hasRows()); + + map.testPutRow(key2, value); + verifyTable.add(key2, value); + verifyTable.verify(map); + + long key3 = key2 += 1; + hashMapResult = map.createHashMapResult(); + joinResult = map.lookup(key3, hashMapResult); assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); assertTrue(!hashMapResult.hasRows()); } @Test - public void testPutWithFullMap() throws Exception { - random = new Random(26078); + public void testFullMap() throws Exception { + random = new Random(93440); // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - for (int i = 0; i < CAPACITY; ++i) { - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < CAPACITY; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); } - for (int i = 0; i < randomLongKeyStream.size(); ++i) { - verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStream.get(i)); + verifyTable.verify(map); + + long anotherKey; + while (true) { + anotherKey = random.nextLong(); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } } - // assertEquals(CAPACITY, map.getCapacity()); - // Get of non-existent key should terminate.. - long anotherKey = randomLongKeyStream.next(); + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); JoinUtil.JoinResult joinResult = map.lookup(anotherKey, hashMapResult); assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); @@ -123,97 +176,91 @@ public void testPutWithFullMap() throws Exception { @Test public void testExpand() throws Exception { - random = new Random(22470); + random = new Random(5227); // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); for (int i = 0; i < 18; ++i) { - long key = randomLongKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - for (int j = 0; j <= i; ++j) { - verifyHashMapResult(map, randomLongKeyStream.get(j), randomByteArrayValueStream.get(j)); + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + // verifyTable.verify(map); } + verifyTable.verify(map); // assertEquals(1 << 18, map.getCapacity()); } - @Test - public void testLarge() throws Exception { - random = new Random(40719); - - // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - long key = randomLongKeyStream.next(); - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + verifyTable.verify(map); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + // verifyTable.verify(map); } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStreams[i]); + verifyTable.verify(map); } } - @Test - public void testLargeAndExpand() throws Exception { - random = new Random(46809); + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastLongHashMap map = new VectorMapJoinFastLongHashMap(false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); - - RandomLongStream randomLongKeyStream = new RandomLongStream(random); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - long key = randomLongKeyStream.next(); - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); - } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomLongKeyStream.get(i), randomByteArrayValueStreams[i]); - } - } - - private void verifyHashMapResult(VectorMapJoinFastLongHashMap map, long key, - RandomByteArrayStream randomByteArrayValueStream) { + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); - CommonFastHashTable.verifyHashMapResult(hashMapResult, randomByteArrayValueStream); + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); } - private void verifyHashMapResult(VectorMapJoinFastLongHashMap map, long key, - byte[] valueBytes) { + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); - CommonFastHashTable.verifyHashMapResult(hashMapResult, valueBytes); - } + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java new file mode 100644 index 0000000..9e94611 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMultiSet.java @@ -0,0 +1,252 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMultiSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMultiSet; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMultiSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(458); + + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + long key = random.nextLong(); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(8000); + + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(4000); + + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + long key1 = random.nextLong(); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + long key2 = key1 += 1; + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertEquals(hashMultiSetResult.count(), 0); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + long key3 = key2 += 1; + hashMultiSetResult = map.createHashMultiSetResult(); + joinResult = map.contains(key3, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + assertEquals(hashMultiSetResult.count(), 0); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(25000); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + for (int i = 0; i < CAPACITY; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + long anotherKey; + while (true) { + anotherKey = random.nextLong(); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashMultiSetResult hashMultiSetResult = map.createHashMultiSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, hashMultiSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(30000); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMultiSet map, VerifyFastLongHashMultiSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(333); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(790); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMultiSet map = + new VectorMapJoinFastLongHashMultiSet( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastLongHashMultiSet verifyTable = new VerifyFastLongHashMultiSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java new file mode 100644 index 0000000..698bcdc --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -0,0 +1,250 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashSet; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashSet; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashSet extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(4186); + + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + long key = random.nextLong(); + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Second time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + + // Third time. + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(1412); + + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + + @Test + public void testGetNonExistent() throws Exception { + random = new Random(100); + + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + long key1 = random.nextLong(); + + map.testPutRow(key1); + verifyTable.add(key1); + verifyTable.verify(map); + + long key2 = key1 += 1; + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(key2, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + + map.testPutRow(key2); + verifyTable.add(key2); + verifyTable.verify(map); + + long key3 = key2 += 1; + hashSetResult = map.createHashSetResult(); + joinResult = map.contains(key3, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testFullMap() throws Exception { + random = new Random(2520); + + // Make sure the map does not expand; should be able to find space. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + for (int i = 0; i < CAPACITY; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + + long anotherKey; + while (true) { + anotherKey = random.nextLong(); + if (!verifyTable.contains(anotherKey)) { + // Unique keys for this test. + break; + } + } + + VectorMapJoinHashSetResult hashSetResult = map.createHashSetResult(); + JoinUtil.JoinResult joinResult = map.contains(anotherKey, hashSetResult); + assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); + } + + @Test + public void testExpand() throws Exception { + random = new Random(348); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + // verifyTable.verify(map); + } + verifyTable.verify(map); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashSet map, VerifyFastLongHashSet verifyTable) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value = new byte[generateLargeCount() - 1]; + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key); + verifyTable.add(key); + verifyTable.verify(map); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey); + // verifyTable.verify(map); + } + verifyTable.verify(map); + } + } + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(7778); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(56); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashSet map = + new VectorMapJoinFastLongHashSet( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); + + VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); + + int keyCount = 1000; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java deleted file mode 100644 index 944bda6..0000000 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastMultiKeyHashMap.java +++ /dev/null @@ -1,231 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; - -import java.util.Random; - -import org.apache.hadoop.hive.ql.exec.JoinUtil; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; -import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; -import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; -import org.junit.Test; - -import static org.junit.Assert.*; - -public class TestVectorMapJoinFastMultiKeyHashMap extends CommonFastHashTable { - - @Test - public void testPutGetOne() throws Exception { - random = new Random(47496); - - VectorMapJoinFastMultiKeyHashMap map = - new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(0)); - - key = randomByteArrayKeyStream.next(); - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream.get(1)); - } - - @Test - public void testPutGetMultiple() throws Exception { - random = new Random(2990); - - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, value); - - // Same key, multiple values. - for (int i = 0; i < 3; ++i) { - value = randomByteArrayValueStream.next(); - map.putRow(key, value); - verifyHashMapResult(map, key, randomByteArrayValueStream); - } - } - - @Test - public void testGetNonExistent() throws Exception { - random = new Random(16916); - - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, LOAD_FACTOR, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - - key[0] = (byte) (key[0] + 1); - map.putRow(key, value); - - key[0] = (byte) (key[0] + 1); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); - assertTrue(!hashMapResult.hasRows()); - } - - @Test - public void testPutWithFullMap() throws Exception { - random = new Random(26078); - - // Make sure the map does not expand; should be able to find space. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, CAPACITY, 1f, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - for (int i = 0; i < CAPACITY; ++i) { - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - } - for (int i = 0; i < randomByteArrayKeyStream.size(); ++i) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStream.get(i)); - } - // assertEquals(CAPACITY, map.getCapacity()); - // Get of non-existent key should terminate.. - byte[] anotherKey = randomByteArrayKeyStream.next(); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(anotherKey, 0, anotherKey.length, hashMapResult); - assertTrue(joinResult == JoinUtil.JoinResult.NOMATCH); - } - - @Test - public void testExpand() throws Exception { - random = new Random(22470); - - // Start with capacity 1; make sure we expand on every put. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, 1, 0.0000001f, WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random); - RandomByteArrayStream randomByteArrayValueStream = new RandomByteArrayStream(random); - - for (int i = 0; i < 18; ++i) { - byte[] key = randomByteArrayKeyStream.next(); - byte[] value = randomByteArrayValueStream.next(); - map.putRow(key, value); - for (int j = 0; j <= i; ++j) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(j), randomByteArrayValueStream.get(j)); - } - } - // assertEquals(1 << 18, map.getCapacity()); - } - - @Test - public void testLarge() throws Exception { - random = new Random(5231); - - // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - byte[] key = randomByteArrayKeyStream.next(); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult == JoinUtil.JoinResult.MATCH) { - // A problem or need different random seed / longer key? - assertTrue(false); - } - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); - } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStreams[i]); - } - } - - @Test - public void testLargeAndExpand() throws Exception { - random = new Random(46809); - - // Use a large capacity that doesn't require expansion, yet. - VectorMapJoinFastMultiKeyHashMap map = new VectorMapJoinFastMultiKeyHashMap(false, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE); - - RandomByteArrayStream randomByteArrayKeyStream = new RandomByteArrayStream(random, 10); - - final int largeSize = 1000; - RandomByteArrayStream[] randomByteArrayValueStreams = new RandomByteArrayStream[largeSize]; - for (int i = 0; i < largeSize; i++) { - randomByteArrayValueStreams[i] = new RandomByteArrayStream(random); - int count = generateLargeCount(); - byte[] key = randomByteArrayKeyStream.next(); - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult == JoinUtil.JoinResult.MATCH) { - // A problem or need different random seed / longer key? - assertTrue(false); - } - for (int v = 0; v < count; v++) { - byte[] value = randomByteArrayValueStreams[i].next(); - map.putRow(key, value); - } - } - for (int i = 0; i < largeSize; i++) { - verifyHashMapResult(map, randomByteArrayKeyStream.get(i), randomByteArrayValueStreams[i]); - } - } - - private void verifyHashMapResult(VectorMapJoinFastMultiKeyHashMap map, byte[] key, - RandomByteArrayStream randomByteArrayValueStream) { - - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } - - CommonFastHashTable.verifyHashMapResult(hashMapResult, randomByteArrayValueStream); - } - - private void verifyHashMapResult(VectorMapJoinFastMultiKeyHashMap map, byte[] key, - byte[] valueBytes) { - - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); - } - - CommonFastHashTable.verifyHashMapResult(hashMapResult, valueBytes); - } - -} diff --git serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java new file mode 100644 index 0000000..1bb990c --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/fast/RandomRowObjectSource.java @@ -0,0 +1,423 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.fast; + +import java.sql.Date; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Random; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.RandomTypeUtil; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hive.common.util.DateUtils; + +/** + * Generate object inspector and random row object[]. + */ +public class RandomRowObjectSource { + + private Random r; + + private int columnCount; + + private List typeNames; + + private PrimitiveCategory[] primitiveCategories; + + private PrimitiveTypeInfo[] primitiveTypeInfos; + + private List primitiveObjectInspectorList; + + private StructObjectInspector rowStructObjectInspector; + + public List typeNames() { + return typeNames; + } + + public PrimitiveCategory[] primitiveCategories() { + return primitiveCategories; + } + + public PrimitiveTypeInfo[] primitiveTypeInfos() { + return primitiveTypeInfos; + } + + public StructObjectInspector rowStructObjectInspector() { + return rowStructObjectInspector; + } + + public StructObjectInspector partialRowStructObjectInspector(int partialFieldCount) { + ArrayList partialPrimitiveObjectInspectorList = + new ArrayList(partialFieldCount); + List columnNames = new ArrayList(partialFieldCount); + for (int i = 0; i < partialFieldCount; i++) { + columnNames.add(String.format("partial%d", i)); + partialPrimitiveObjectInspectorList.add( + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + primitiveTypeInfos[i])); + } + + return ObjectInspectorFactory.getStandardStructObjectInspector( + columnNames, primitiveObjectInspectorList); + } + + public void init(Random r) { + this.r = r; + chooseSchema(); + } + + /* + * For now, exclude CHAR until we determine why there is a difference (blank padding) + * serializing with LazyBinarySerializeWrite and the regular SerDe... + */ + private static String[] possibleHiveTypeNames = { + "boolean", + "tinyint", + "smallint", + "int", + "bigint", + "date", + "float", + "double", + "string", +// "char", + "varchar", + "binary", + "date", + "timestamp", + "interval_year_month", + "interval_day_time", + "decimal" + }; + + private void chooseSchema() { + HashSet hashSet = null; + boolean allTypes; + boolean onlyOne = (r.nextInt(100) == 7); + if (onlyOne) { + columnCount = 1; + allTypes = false; + } else { + allTypes = r.nextBoolean(); + if (allTypes) { + // One of each type. + columnCount = possibleHiveTypeNames.length; + hashSet = new HashSet(); + } else { + columnCount = 1 + r.nextInt(20); + } + } + typeNames = new ArrayList(columnCount); + primitiveCategories = new PrimitiveCategory[columnCount]; + primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + primitiveObjectInspectorList = new ArrayList(columnCount); + List columnNames = new ArrayList(columnCount); + for (int c = 0; c < columnCount; c++) { + columnNames.add(String.format("col%d", c)); + String typeName; + + if (onlyOne) { + typeName = possibleHiveTypeNames[r.nextInt(possibleHiveTypeNames.length)]; + } else { + int typeNum; + if (allTypes) { + while (true) { + typeNum = r.nextInt(possibleHiveTypeNames.length); + Integer typeNumInteger = new Integer(typeNum); + if (!hashSet.contains(typeNumInteger)) { + hashSet.add(typeNumInteger); + break; + } + } + } else { + typeNum = r.nextInt(possibleHiveTypeNames.length); + } + typeName = possibleHiveTypeNames[typeNum]; + } + if (typeName.equals("char")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("char(%d)", maxLength); + } else if (typeName.equals("varchar")) { + int maxLength = 1 + r.nextInt(100); + typeName = String.format("varchar(%d)", maxLength); + } else if (typeName.equals("decimal")) { + typeName = String.format("decimal(%d,%d)", HiveDecimal.SYSTEM_DEFAULT_PRECISION, HiveDecimal.SYSTEM_DEFAULT_SCALE); + } + PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName); + primitiveTypeInfos[c] = primitiveTypeInfo; + PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory(); + primitiveCategories[c] = primitiveCategory; + primitiveObjectInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo)); + typeNames.add(typeName); + } + rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList); + } + + public Object[][] randomRows(int n) { + Object[][] result = new Object[n][]; + for (int i = 0; i < n; i++) { + result[i] = randomRow(); + } + return result; + } + + public Object[] randomRow() { + Object row[] = new Object[columnCount]; + for (int c = 0; c < columnCount; c++) { + Object object = randomObject(c); + if (object == null) { + throw new Error("Unexpected null for column " + c); + } + row[c] = getWritableObject(c, object); + if (row[c] == null) { + throw new Error("Unexpected null for writable for column " + c); + } + } + return row; + } + + public static void sort(Object[][] rows, ObjectInspector oi) { + for (int i = 0; i < rows.length; i++) { + for (int j = i + 1; j < rows.length; j++) { + if (ObjectInspectorUtils.compare(rows[i], oi, rows[j], oi) > 0) { + Object[] t = rows[i]; + rows[i] = rows[j]; + rows[j] = t; + } + } + } + } + + public void sort(Object[][] rows) { + RandomRowObjectSource.sort(rows, rowStructObjectInspector); + } + + public Object getWritableObject(int column, Object object) { + ObjectInspector objectInspector = primitiveObjectInspectorList.get(column); + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return ((WritableBooleanObjectInspector) objectInspector).create((boolean) object); + case BYTE: + return ((WritableByteObjectInspector) objectInspector).create((byte) object); + case SHORT: + return ((WritableShortObjectInspector) objectInspector).create((short) object); + case INT: + return ((WritableIntObjectInspector) objectInspector).create((int) object); + case LONG: + return ((WritableLongObjectInspector) objectInspector).create((long) object); + case DATE: + return ((WritableDateObjectInspector) objectInspector).create((Date) object); + case FLOAT: + return ((WritableFloatObjectInspector) objectInspector).create((float) object); + case DOUBLE: + return ((WritableDoubleObjectInspector) objectInspector).create((double) object); + case STRING: + return ((WritableStringObjectInspector) objectInspector).create((String) object); + case CHAR: + { + WritableHiveCharObjectInspector writableCharObjectInspector = + new WritableHiveCharObjectInspector( (CharTypeInfo) primitiveTypeInfo); + return writableCharObjectInspector.create(new HiveChar(StringUtils.EMPTY, -1)); + } + case VARCHAR: + { + WritableHiveVarcharObjectInspector writableVarcharObjectInspector = + new WritableHiveVarcharObjectInspector( (VarcharTypeInfo) primitiveTypeInfo); + return writableVarcharObjectInspector.create(new HiveVarchar(StringUtils.EMPTY, -1)); + } + case BINARY: + return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector.create(ArrayUtils.EMPTY_BYTE_ARRAY); + case TIMESTAMP: + return ((WritableTimestampObjectInspector) objectInspector).create(new Timestamp(0)); + case INTERVAL_YEAR_MONTH: + return ((WritableHiveIntervalYearMonthObjectInspector) objectInspector).create(new HiveIntervalYearMonth(0)); + case INTERVAL_DAY_TIME: + return ((WritableHiveIntervalDayTimeObjectInspector) objectInspector).create(new HiveIntervalDayTime(0, 0)); + case DECIMAL: + { + WritableHiveDecimalObjectInspector writableDecimalObjectInspector = + new WritableHiveDecimalObjectInspector((DecimalTypeInfo) primitiveTypeInfo); + return writableDecimalObjectInspector.create(HiveDecimal.ZERO); + } + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public Object randomObject(int column) { + PrimitiveCategory primitiveCategory = primitiveCategories[column]; + PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column]; + switch (primitiveCategory) { + case BOOLEAN: + return Boolean.valueOf(r.nextInt(1) == 1); + case BYTE: + return Byte.valueOf((byte) r.nextInt()); + case SHORT: + return Short.valueOf((short) r.nextInt()); + case INT: + return Integer.valueOf(r.nextInt()); + case LONG: + return Long.valueOf(r.nextLong()); + case DATE: + return RandomTypeUtil.getRandDate(r); + case FLOAT: + return Float.valueOf(r.nextFloat() * 10 - 5); + case DOUBLE: + return Double.valueOf(r.nextDouble() * 10 - 5); + case STRING: + return RandomTypeUtil.getRandString(r); + case CHAR: + return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo); + case VARCHAR: + return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo); + case BINARY: + return getRandBinary(r, 1 + r.nextInt(100)); + case TIMESTAMP: + return RandomTypeUtil.getRandTimestamp(r); + case INTERVAL_YEAR_MONTH: + return getRandIntervalYearMonth(r); + case INTERVAL_DAY_TIME: + return getRandIntervalDayTime(r); + case DECIMAL: + return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo); + default: + throw new Error("Unknown primitive category " + primitiveCategory); + } + } + + public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) { + int maxLength = 1 + r.nextInt(charTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveChar hiveChar = new HiveChar(randomString, maxLength); + return hiveChar; + } + + public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) { + int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength()); + String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100); + HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength); + return hiveVarchar; + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static HiveDecimal getRandHiveDecimal(Random r, DecimalTypeInfo decimalTypeInfo) { + while (true) { + StringBuilder sb = new StringBuilder(); + int precision = 1 + r.nextInt(18); + int scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(RandomTypeUtil.getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + if (bd.scale() > bd.precision()) { + // Sometimes weird decimals are produced? + continue; + } + + return bd; + } + } + + public static HiveIntervalYearMonth getRandIntervalYearMonth(Random r) { + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String intervalYearMonthStr = String.format("%s%d-%d", + yearMonthSignStr, + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(0 + r.nextInt(12))); // month + HiveIntervalYearMonth intervalYearMonthVal = HiveIntervalYearMonth.valueOf(intervalYearMonthStr); + return intervalYearMonthVal; + } + + public static HiveIntervalDayTime getRandIntervalDayTime(Random r) { + String optionalNanos = ""; + if (r.nextInt(2) == 1) { + optionalNanos = String.format(".%09d", + Integer.valueOf(0 + r.nextInt(DateUtils.NANOS_PER_SEC))); + } + String yearMonthSignStr = r.nextInt(2) == 0 ? "" : "-"; + String dayTimeStr = String.format("%s%d %02d:%02d:%02d%s", + yearMonthSignStr, + Integer.valueOf(1 + r.nextInt(28)), // day + Integer.valueOf(0 + r.nextInt(24)), // hour + Integer.valueOf(0 + r.nextInt(60)), // minute + Integer.valueOf(0 + r.nextInt(60)), // second + optionalNanos); + HiveIntervalDayTime intervalDayTimeVal = HiveIntervalDayTime.valueOf(dayTimeStr); + return intervalDayTimeVal; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java index 4415431..bbb35c7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java @@ -40,7 +40,7 @@ * * Reading some fields require a results object to receive value information. A separate * results object is created by the caller at initialization per different field even for the same - * type. + * type. * * Some type values are by reference to either bytes in the deserialization buffer or to * other type specific buffers. So, those references are only valid until the next time set is diff --git serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index e27c6b1..52dd5a3 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -32,12 +32,27 @@ import org.apache.hadoop.hive.serde2.fast.DeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.fast.SerializeWrite; +import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; +import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; +import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.FloatWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; /** * TestBinarySortableSerDe. @@ -45,27 +60,28 @@ */ public class VerifyFast { - public static void verifyDeserializeRead(DeserializeRead deserializeRead, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException { + public static void verifyDeserializeRead(DeserializeRead deserializeRead, + PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException { boolean isNull; isNull = deserializeRead.readCheckNull(); if (isNull) { - if (object != null) { + if (writable != null) { TestCase.fail("Field reports null but object is not null"); } return; - } else if (object == null) { + } else if (writable == null) { TestCase.fail("Field report not null but object is null"); } switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: { boolean value = deserializeRead.currentBoolean; - if (!(object instanceof Boolean)) { - TestCase.fail("Boolean expected object not Boolean"); + if (!(writable instanceof BooleanWritable)) { + TestCase.fail("Boolean expected writable not Boolean"); } - Boolean expected = (Boolean) object; + boolean expected = ((BooleanWritable) writable).get(); if (value != expected) { TestCase.fail("Boolean field mismatch (expected " + expected + " found " + value + ")"); } @@ -74,10 +90,10 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case BYTE: { byte value = deserializeRead.currentByte; - if (!(object instanceof Byte)) { - TestCase.fail("Byte expected object not Byte"); + if (!(writable instanceof ByteWritable)) { + TestCase.fail("Byte expected writable not Byte"); } - Byte expected = (Byte) object; + byte expected = ((ByteWritable) writable).get(); if (value != expected) { TestCase.fail("Byte field mismatch (expected " + (int) expected + " found " + (int) value + ")"); } @@ -86,10 +102,10 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case SHORT: { short value = deserializeRead.currentShort; - if (!(object instanceof Short)) { - TestCase.fail("Short expected object not Short"); + if (!(writable instanceof ShortWritable)) { + TestCase.fail("Short expected writable not Short"); } - Short expected = (Short) object; + short expected = ((ShortWritable) writable).get(); if (value != expected) { TestCase.fail("Short field mismatch (expected " + expected + " found " + value + ")"); } @@ -98,10 +114,10 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case INT: { int value = deserializeRead.currentInt; - if (!(object instanceof Integer)) { - TestCase.fail("Integer expected object not Integer"); + if (!(writable instanceof IntWritable)) { + TestCase.fail("Integer expected writable not Integer"); } - Integer expected = (Integer) object; + int expected = ((IntWritable) writable).get(); if (value != expected) { TestCase.fail("Int field mismatch (expected " + expected + " found " + value + ")"); } @@ -110,10 +126,10 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case LONG: { long value = deserializeRead.currentLong; - if (!(object instanceof Long)) { - TestCase.fail("Long expected object not Long"); + if (!(writable instanceof LongWritable)) { + TestCase.fail("Long expected writable not Long"); } - Long expected = (Long) object; + Long expected = ((LongWritable) writable).get(); if (value != expected) { TestCase.fail("Long field mismatch (expected " + expected + " found " + value + ")"); } @@ -122,10 +138,10 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case FLOAT: { float value = deserializeRead.currentFloat; - Float expected = (Float) object; - if (!(object instanceof Float)) { - TestCase.fail("Float expected object not Float"); + if (!(writable instanceof FloatWritable)) { + TestCase.fail("Float expected writable not Float"); } + float expected = ((FloatWritable) writable).get(); if (value != expected) { TestCase.fail("Float field mismatch (expected " + expected + " found " + value + ")"); } @@ -134,10 +150,10 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case DOUBLE: { double value = deserializeRead.currentDouble; - Double expected = (Double) object; - if (!(object instanceof Double)) { - TestCase.fail("Double expected object not Double"); + if (!(writable instanceof DoubleWritable)) { + TestCase.fail("Double expected writable not Double"); } + double expected = ((DoubleWritable) writable).get(); if (value != expected) { TestCase.fail("Double field mismatch (expected " + expected + " found " + value + ")"); } @@ -151,7 +167,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); Text text = new Text(stringBytes); String string = text.toString(); - String expected = (String) object; + String expected = ((Text) writable).toString(); if (!string.equals(expected)) { TestCase.fail("String field mismatch (expected '" + expected + "' found '" + string + "')"); } @@ -168,7 +184,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit HiveChar hiveChar = new HiveChar(string, ((CharTypeInfo) primitiveTypeInfo).getLength()); - HiveChar expected = (HiveChar) object; + HiveChar expected = ((HiveCharWritable) writable).getHiveChar(); if (!hiveChar.equals(expected)) { TestCase.fail("Char field mismatch (expected '" + expected + "' found '" + hiveChar + "')"); } @@ -185,7 +201,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit HiveVarchar hiveVarchar = new HiveVarchar(string, ((VarcharTypeInfo) primitiveTypeInfo).getLength()); - HiveVarchar expected = (HiveVarchar) object; + HiveVarchar expected = ((HiveVarcharWritable) writable).getHiveVarchar(); if (!hiveVarchar.equals(expected)) { TestCase.fail("Varchar field mismatch (expected '" + expected + "' found '" + hiveVarchar + "')"); } @@ -197,7 +213,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit if (value == null) { TestCase.fail("Decimal field evaluated to NULL"); } - HiveDecimal expected = (HiveDecimal) object; + HiveDecimal expected = ((HiveDecimalWritable) writable).getHiveDecimal(); if (!value.equals(expected)) { DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) primitiveTypeInfo; int precision = decimalTypeInfo.getPrecision(); @@ -209,7 +225,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case DATE: { Date value = deserializeRead.currentDateWritable.get(); - Date expected = (Date) object; + Date expected = ((DateWritable) writable).get(); if (!value.equals(expected)) { TestCase.fail("Date field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -218,7 +234,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case TIMESTAMP: { Timestamp value = deserializeRead.currentTimestampWritable.getTimestamp(); - Timestamp expected = (Timestamp) object; + Timestamp expected = ((TimestampWritable) writable).getTimestamp(); if (!value.equals(expected)) { TestCase.fail("Timestamp field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -227,7 +243,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case INTERVAL_YEAR_MONTH: { HiveIntervalYearMonth value = deserializeRead.currentHiveIntervalYearMonthWritable.getHiveIntervalYearMonth(); - HiveIntervalYearMonth expected = (HiveIntervalYearMonth) object; + HiveIntervalYearMonth expected = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth(); if (!value.equals(expected)) { TestCase.fail("HiveIntervalYearMonth field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -236,7 +252,7 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit case INTERVAL_DAY_TIME: { HiveIntervalDayTime value = deserializeRead.currentHiveIntervalDayTimeWritable.getHiveIntervalDayTime(); - HiveIntervalDayTime expected = (HiveIntervalDayTime) object; + HiveIntervalDayTime expected = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime(); if (!value.equals(expected)) { TestCase.fail("HiveIntervalDayTime field mismatch (expected " + expected.toString() + " found " + value.toString() + ")"); } @@ -248,7 +264,8 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit deserializeRead.currentBytes, deserializeRead.currentBytesStart, deserializeRead.currentBytesStart + deserializeRead.currentBytesLength); - byte[] expected = (byte[]) object; + BytesWritable bytesWritable = (BytesWritable) writable; + byte[] expected = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); if (byteArray.length != expected.length){ TestCase.fail("Byte Array field mismatch (expected " + Arrays.toString(expected) + " found " + Arrays.toString(byteArray) + ")"); @@ -266,57 +283,58 @@ public static void verifyDeserializeRead(DeserializeRead deserializeRead, Primit } } - public static void serializeWrite(SerializeWrite serializeWrite, PrimitiveTypeInfo primitiveTypeInfo, Object object) throws IOException { - if (object == null) { + public static void serializeWrite(SerializeWrite serializeWrite, + PrimitiveTypeInfo primitiveTypeInfo, Writable writable) throws IOException { + if (writable == null) { serializeWrite.writeNull(); return; } switch (primitiveTypeInfo.getPrimitiveCategory()) { case BOOLEAN: { - boolean value = (Boolean) object; + boolean value = ((BooleanWritable) writable).get(); serializeWrite.writeBoolean(value); } break; case BYTE: { - byte value = (Byte) object; + byte value = ((ByteWritable) writable).get(); serializeWrite.writeByte(value); } break; case SHORT: { - short value = (Short) object; + short value = ((ShortWritable) writable).get(); serializeWrite.writeShort(value); } break; case INT: { - int value = (Integer) object; + int value = ((IntWritable) writable).get(); serializeWrite.writeInt(value); } break; case LONG: { - long value = (Long) object; + long value = ((LongWritable) writable).get(); serializeWrite.writeLong(value); } break; case FLOAT: { - float value = (Float) object; + float value = ((FloatWritable) writable).get(); serializeWrite.writeFloat(value); } break; case DOUBLE: { - double value = (Double) object; + double value = ((DoubleWritable) writable).get(); serializeWrite.writeDouble(value); } break; case STRING: { - String value = (String) object; + Text value = (Text) writable; byte[] stringBytes = value.getBytes(); int stringLength = stringBytes.length; serializeWrite.writeString(stringBytes, 0, stringLength); @@ -324,51 +342,52 @@ public static void serializeWrite(SerializeWrite serializeWrite, PrimitiveTypeIn break; case CHAR: { - HiveChar value = (HiveChar) object; + HiveChar value = ((HiveCharWritable) writable).getHiveChar(); serializeWrite.writeHiveChar(value); } break; case VARCHAR: { - HiveVarchar value = (HiveVarchar) object; + HiveVarchar value = ((HiveVarcharWritable) writable).getHiveVarchar(); serializeWrite.writeHiveVarchar(value); } break; case DECIMAL: { - HiveDecimal value = (HiveDecimal) object; + HiveDecimal value = ((HiveDecimalWritable) writable).getHiveDecimal(); DecimalTypeInfo decTypeInfo = (DecimalTypeInfo)primitiveTypeInfo; serializeWrite.writeHiveDecimal(value, decTypeInfo.scale()); } break; case DATE: { - Date value = (Date) object; + Date value = ((DateWritable) writable).get(); serializeWrite.writeDate(value); } break; case TIMESTAMP: { - Timestamp value = (Timestamp) object; + Timestamp value = ((TimestampWritable) writable).getTimestamp(); serializeWrite.writeTimestamp(value); } break; case INTERVAL_YEAR_MONTH: { - HiveIntervalYearMonth value = (HiveIntervalYearMonth) object; + HiveIntervalYearMonth value = ((HiveIntervalYearMonthWritable) writable).getHiveIntervalYearMonth(); serializeWrite.writeHiveIntervalYearMonth(value); } break; case INTERVAL_DAY_TIME: { - HiveIntervalDayTime value = (HiveIntervalDayTime) object; + HiveIntervalDayTime value = ((HiveIntervalDayTimeWritable) writable).getHiveIntervalDayTime(); serializeWrite.writeHiveIntervalDayTime(value); } break; case BINARY: { - byte[] binaryBytes = (byte[]) object; - int length = binaryBytes.length; + BytesWritable byteWritable = (BytesWritable) writable; + byte[] binaryBytes = byteWritable.getBytes(); + int length = byteWritable.getLength(); serializeWrite.writeBinary(binaryBytes, 0, length); } break; diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java index 14fc38e..1349f74 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java @@ -23,6 +23,8 @@ import java.util.List; import java.util.Random; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; @@ -30,6 +32,28 @@ import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalDayTimeObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveIntervalYearMonthObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; + +import com.sun.jdi.PrimitiveType; public class MyTestClass { @@ -202,4 +226,66 @@ public static Object getNonRandValue(Object[] nrArray, int index) { static Object[] nrIntervalDayTime = { HiveIntervalDayTime.valueOf("1 0:0:0") }; + + public static void nonRandomRowFill(Object[][] rows, PrimitiveCategory[] primitiveCategories) { + int minCount = Math.min(rows.length, nrDecimal.length); + for (int i = 0; i < minCount; i++) { + Object[] row = rows[i]; + for (int c = 0; c < primitiveCategories.length; c++) { + Object object = row[c]; // Current value. + switch (primitiveCategories[c]) { + case BOOLEAN: + // Use current for now. + break; + case BYTE: + object = nrByte; + break; + case SHORT: + object = nrShort; + break; + case INT: + object = nrInt; + break; + case LONG: + object = nrLong; + break; + case DATE: + object = nrDate; + break; + case FLOAT: + object = nrFloat; + break; + case DOUBLE: + object = nrDouble; + break; + case STRING: + object = nrString; + break; + case CHAR: + // Use current for now. + break; + case VARCHAR: + // Use current for now. + break; + case BINARY: + // Use current for now. + break; + case TIMESTAMP: + // Use current for now. + break; + case INTERVAL_YEAR_MONTH: + object = nrIntervalYearMonth; + break; + case INTERVAL_DAY_TIME: + object = nrIntervalDayTime; + break; + case DECIMAL: + object = nrDecimal[i]; + break; + default: + throw new Error("Unknown primitive category " + primitiveCategories[c]); + } + } + } + } } diff --git serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java index 0be3213..58937db 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java @@ -17,53 +17,72 @@ */ package org.apache.hadoop.hive.serde2.binarysortable; +import java.io.EOFException; import java.util.Arrays; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Random; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.VerifyFast; -import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; import junit.framework.TestCase; public class TestBinarySortableFast extends TestCase { - private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, + private void testBinarySortableFast( + RandomRowObjectSource source, Object[][] rows, boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker, - SerDe serde, StructObjectInspector rowOI, boolean ascending, - Map primitiveTypeInfoMap) throws Throwable { + SerDe serde, StructObjectInspector rowOI, + SerDe serde_fewer, StructObjectInspector writeRowOI, + boolean ascending, PrimitiveTypeInfo[] primitiveTypeInfos, + boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { + + int rowCount = rows.length; + int columnCount = primitiveTypeInfos.length; + + boolean[] columnsToInclude = null; + if (useIncludeColumns) { + columnsToInclude = new boolean[columnCount]; + for (int i = 0; i < columnCount; i++) { + columnsToInclude[i] = r.nextBoolean(); + } + } + + int writeColumnCount = columnCount; + if (doWriteFewerColumns) { + writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); + } BinarySortableSerializeWrite binarySortableSerializeWrite = - new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); + new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker); // Try to serialize // One Writable per row. - BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - - int[][] perFieldWriteLengthsArray = new int[myTestPrimitiveClasses.length][]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; + + int[][] perFieldWriteLengthsArray = new int[rowCount][]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; Output output = new Output(); binarySortableSerializeWrite.set(output); - int[] perFieldWriteLengths = new int[MyTestPrimitiveClass.primitiveCount]; - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfoMap.get(t)[index], object); + int[] perFieldWriteLengths = new int[columnCount]; + for (int index = 0; index < writeColumnCount; index++) { + + Writable writable = (Writable) row[index]; + + VerifyFast.serializeWrite(binarySortableSerializeWrite, primitiveTypeInfos[index], writable); perFieldWriteLengths[index] = output.getLength(); } perFieldWriteLengthsArray[i] = perFieldWriteLengths; @@ -90,34 +109,87 @@ private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasse // Try to deserialize using DeserializeRead our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); - BinarySortableDeserializeRead binarySortableDeserializeRead = + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + if (useIncludeColumns) { + binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude); + } + BytesWritable bytesWritable = serializeWriteBytes[i]; - binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + binarySortableDeserializeRead.set( + bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable); + } } binarySortableDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); + + /* + * Clip off one byte and expect to get an EOFException on the write field. + */ + BinarySortableDeserializeRead binarySortableDeserializeRead2 = + new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + + if (useIncludeColumns) { + binarySortableDeserializeRead2.setColumnsToInclude(columnsToInclude); + } + + binarySortableDeserializeRead2.set( + bytesWritable.getBytes(), 0, bytesWritable.getLength() - 1); // One fewer byte. + + for (int index = 0; index < writeColumnCount; index++) { + Writable writable = (Writable) row[index]; + if (index == writeColumnCount - 1) { + boolean threw = false; + try { + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); + } catch (EOFException e) { + threw = true; + } + TestCase.assertTrue(threw); + } else { + if (useIncludeColumns && !columnsToInclude[index]) { + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], null); + } else { + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead2, primitiveTypeInfos[index], writable); + } + } + } + } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; - List deserializedRow = (List) serde.deserialize(bytesWritable); - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); + // Note that regular SerDe doesn't tolerate fewer columns. + List deserializedRow; + if (doWriteFewerColumns) { + deserializedRow = (List) serde_fewer.deserialize(bytesWritable); + } else { + deserializedRow = (List) serde.deserialize(bytesWritable); + } + + Object[] row = rows[i]; + for (int index = 0; index < writeColumnCount; index++) { + Object expected = row[index]; Object object = deserializedRow.get(index); if (expected == null || object == null) { if (expected != null || object != null) { @@ -132,14 +204,19 @@ private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasse } // One Writable per row. - BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - + BytesWritable serdeBytes[] = new BytesWritable[rowCount]; + // Serialize using the SerDe, then below deserialize using DeserializeRead. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; // Since SerDe reuses memory, we will need to make a copy. - BytesWritable serialized = (BytesWritable) serde.serialize(t, rowOI); + BytesWritable serialized; + if (doWriteFewerColumns) { + serialized = (BytesWritable) serde_fewer.serialize(row, rowOI); + } else { + serialized = (BytesWritable) serde.serialize(row, rowOI);; + } BytesWritable bytesWritable = new BytesWritable(); bytesWritable.set(serialized); byte[] serDeOutput = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); @@ -167,85 +244,188 @@ private void testBinarySortableFast(MyTestPrimitiveClass[] myTestPrimitiveClasse } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfoMap.get(t); - BinarySortableDeserializeRead binarySortableDeserializeRead = + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + BinarySortableDeserializeRead binarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc); + if (useIncludeColumns) { + binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude); + } + BytesWritable bytesWritable = serdeBytes[i]; binarySortableDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(binarySortableDeserializeRead, primitiveTypeInfos[index], writable); + } } binarySortableDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!binarySortableDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!binarySortableDeserializeRead.bufferRangeHasExtraDataWarned()); } } + private void testBinarySortableFastCase(int caseNum, boolean doNonRandomFill, Random r) + throws Throwable { + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + int rowCount = 1000; + Object[][] rows = source.randomRows(rowCount); + + if (doNonRandomFill) { + MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); + } + + // We need to operate on sorted data to fully test BinarySortable. + source.sort(rows); + + StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); + + PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + int columnCount = primitiveTypeInfos.length; + + int writeColumnCount = columnCount; + StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; + boolean doWriteFewerColumns = r.nextBoolean(); + if (doWriteFewerColumns) { + writeColumnCount = 1 + r.nextInt(columnCount); + if (writeColumnCount == columnCount) { + doWriteFewerColumns = false; + } else { + writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); + } + } + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); + String order; + order = StringUtils.leftPad("", columnCount, '+'); + String nullOrder; + nullOrder = StringUtils.leftPad("", columnCount, 'a'); + SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); + + SerDe serde_ascending_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_ascending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder); + } + + order = StringUtils.leftPad("", columnCount, '-'); + nullOrder = StringUtils.leftPad("", columnCount, 'z'); + SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); + + SerDe serde_descending_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_descending_fewer = TestBinarySortableSerDe.getSerDe(partialFieldNames, partialFieldTypes, order, nullOrder); + } + + boolean[] columnSortOrderIsDesc = new boolean[columnCount]; + Arrays.fill(columnSortOrderIsDesc, false); + byte[] columnNullMarker = new byte[columnCount]; + Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); + byte[] columnNotNullMarker = new byte[columnCount]; + Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); + + /* + * Acending. + */ + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); + + if (doWriteFewerColumns) { + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_ascending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ true, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + } + + /* + * Descending. + */ + Arrays.fill(columnSortOrderIsDesc, true); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_ascending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); + + if (doWriteFewerColumns) { + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_descending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + testBinarySortableFast(source, rows, + columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker, + serde_descending, rowStructObjectInspector, + serde_descending_fewer, writeRowStructObjectInspector, + /* ascending */ false, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + } + + } + public void testBinarySortableFast() throws Throwable { + try { + Random r = new Random(35790); + + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testBinarySortableFastCase(caseNum, (i % 2 == 0), r); + caseNum++; + } - int num = 1000; - Random r = new Random(1234); - MyTestPrimitiveClass myTestPrimitiveClasses[] = new MyTestPrimitiveClass[num]; - // Need a map because we sort. - Map primitiveTypeInfoMap = new HashMap(); - - int i; - // First try non-random values - for (i = 0; i < MyTestClass.nrDecimal.length; i++) { - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.nonRandomFill(i, extraTypeInfo); - myTestPrimitiveClasses[i] = t; - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - primitiveTypeInfoMap.put(t, primitiveTypeInfos); - } - - for ( ; i < num; i++) { - int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - int field = 0; - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.randomFill(r, randField, field, extraTypeInfo); - myTestPrimitiveClasses[i] = t; - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - primitiveTypeInfoMap.put(t, primitiveTypeInfos); - } - - StructObjectInspector rowOI = (StructObjectInspector) ObjectInspectorFactory - .getReflectionObjectInspector(MyTestPrimitiveClass.class, - ObjectInspectorOptions.JAVA); - - TestBinarySortableSerDe.sort(myTestPrimitiveClasses, rowOI); - - String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); - String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); - String order; - order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '+'); - String nullOrder; - nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'a'); - SerDe serde_ascending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); - order = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, '-'); - nullOrder = StringUtils.leftPad("", MyTestPrimitiveClass.primitiveCount, 'z'); - SerDe serde_descending = TestBinarySortableSerDe.getSerDe(fieldNames, fieldTypes, order, nullOrder); - - boolean[] columnSortOrderIsDesc = new boolean[MyTestPrimitiveClass.primitiveCount]; - Arrays.fill(columnSortOrderIsDesc, false); - byte[] columnNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; - Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO); - byte[] columnNotNullMarker = new byte[MyTestPrimitiveClass.primitiveCount]; - Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, - columnNotNullMarker, serde_ascending, rowOI, true, primitiveTypeInfoMap); - Arrays.fill(columnSortOrderIsDesc, true); - testBinarySortableFast(myTestPrimitiveClasses, columnSortOrderIsDesc, columnNullMarker, - columnNotNullMarker, serde_descending, rowOI, false, primitiveTypeInfoMap); } catch (Throwable e) { e.printStackTrace(); throw e; diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java index 7ebe7ae..76b93c6 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.EOFException; import java.util.Arrays; import java.util.Properties; import java.util.Random; @@ -24,11 +25,15 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead; import org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -37,31 +42,55 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; import junit.framework.TestCase; public class TestLazySimpleFast extends TestCase { - private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, LazySimpleSerDe[] serdes, - StructObjectInspector[] rowOIs, byte separator, LazySerDeParameters[] serdeParams, - PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + private void testLazySimpleFast( + RandomRowObjectSource source, Object[][] rows, + LazySimpleSerDe serde, StructObjectInspector rowOI, + LazySimpleSerDe serde_fewer, StructObjectInspector writeRowOI, + byte separator, LazySerDeParameters serdeParams, LazySerDeParameters serdeParams_fewer, + PrimitiveTypeInfo[] primitiveTypeInfos, + boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { + + int rowCount = rows.length; + int columnCount = primitiveTypeInfos.length; + + boolean[] columnsToInclude = null; + if (useIncludeColumns) { + columnsToInclude = new boolean[columnCount]; + for (int i = 0; i < columnCount; i++) { + columnsToInclude[i] = r.nextBoolean(); + } + } + int writeColumnCount = columnCount; + PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos; + if (doWriteFewerColumns) { + writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); + writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount); + } // Try to serialize - BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; Output output = new Output(); - LazySimpleSerializeWrite lazySimpleSerializeWrite = - new LazySimpleSerializeWrite(MyTestPrimitiveClass.primitiveCount, - separator, serdeParams[i]); + LazySimpleSerializeWrite lazySimpleSerializeWrite = + new LazySimpleSerializeWrite(columnCount, + separator, serdeParams); lazySimpleSerializeWrite.set(output); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfosArray[i][index], object); + for (int index = 0; index < columnCount; index++) { + + Writable writable = (Writable) row[index]; + + VerifyFast.serializeWrite(lazySimpleSerializeWrite, primitiveTypeInfos[index], writable); } BytesWritable bytesWritable = new BytesWritable(); @@ -70,12 +99,15 @@ private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, L } // Try to deserialize - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazySimpleDeserializeRead lazySimpleDeserializeRead = - new LazySimpleDeserializeRead(primitiveTypeInfos, - separator, serdeParams[i]); + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(writePrimitiveTypeInfos, + separator, serdeParams); + + if (useIncludeColumns) { + lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude); + } BytesWritable bytesWritable = serializeWriteBytes[i]; byte[] bytes = bytesWritable.getBytes(); @@ -87,28 +119,37 @@ private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, L chars[c] = (char) (bytes[c] & 0xFF); } - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); - VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable); + } } lazySimpleDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); + } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; - LazyStruct lazySimpleStruct = (LazyStruct) serdes[i].deserialize(bytesWritable); + LazyStruct lazySimpleStruct = (LazyStruct) serde.deserialize(bytesWritable); - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] row = rows[i]; - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + for (int index = 0; index < columnCount; index++) { PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; - Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + Writable writable = (Writable) row[index]; LazyPrimitive lazyPrimitive = (LazyPrimitive) lazySimpleStruct.getField(index); Object object; if (lazyPrimitive != null) { @@ -116,12 +157,12 @@ private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, L } else { object = null; } - if (expected == null || object == null) { - if (expected != null || object != null) { + if (writable == null || object == null) { + if (writable != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { - if (!object.equals(expected)) { + if (!object.equals(writable)) { fail("SerDe deserialized value does not match"); } } @@ -129,21 +170,19 @@ private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, L } // One Writable per row. - byte[][] serdeBytes = new byte[myTestPrimitiveClasses.length][]; - + byte[][] serdeBytes = new byte[rowCount][]; + // Serialize using the SerDe, then below deserialize using DeserializeRead. - Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] serdeRow = new Object[columnCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; // LazySimple seems to work better with an row object array instead of a Java object... - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); - row[index] = object; + for (int index = 0; index < columnCount; index++) { + serdeRow[index] = row[index]; } - Text serialized = (Text) serdes[i].serialize(row, rowOIs[i]); + Text serialized = (Text) serde.serialize(serdeRow, rowOI); byte[] bytes1 = Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength()); byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); @@ -154,23 +193,37 @@ private void testLazySimpleFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, L } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazySimpleDeserializeRead lazySimpleDeserializeRead = - new LazySimpleDeserializeRead(primitiveTypeInfos, - separator, serdeParams[i]); + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + + LazySimpleDeserializeRead lazySimpleDeserializeRead = + new LazySimpleDeserializeRead(writePrimitiveTypeInfos, + separator, serdeParams); + + if (useIncludeColumns) { + lazySimpleDeserializeRead.setColumnsToInclude(columnsToInclude); + } byte[] bytes = serdeBytes[i]; lazySimpleDeserializeRead.set(bytes, 0, bytes.length); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazySimpleDeserializeRead, primitiveTypeInfos[index], writable); + } } lazySimpleDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!lazySimpleDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!lazySimpleDeserializeRead.bufferRangeHasExtraDataWarned()); } } @@ -186,7 +239,7 @@ private Properties createProperties(String fieldNames, String fieldTypes) { // Set the configuration parameters tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - + tbl.setProperty("columns", fieldNames); tbl.setProperty("columns.types", fieldTypes); @@ -210,44 +263,95 @@ private LazySerDeParameters getSerDeParams(String fieldNames, String fieldTypes) return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); } - public void testLazySimpleFast() throws Throwable { - try { + public void testLazySimpleFastCase(int caseNum, boolean doNonRandomFill, Random r) + throws Throwable { + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); - int num = 1000; - Random r = new Random(1234); - MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; - PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; - for (int i = 0; i < num; i++) { - int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - int field = 0; - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.randomFill(r, randField, field, extraTypeInfo); - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - rows[i] = t; - primitiveTypeInfosArray[i] = primitiveTypeInfos; + int rowCount = 1000; + Object[][] rows = source.randomRows(rowCount); + + if (doNonRandomFill) { + MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); + } + + StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); + + PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + int columnCount = primitiveTypeInfos.length; + + int writeColumnCount = columnCount; + StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; + boolean doWriteFewerColumns = r.nextBoolean(); + if (doWriteFewerColumns) { + writeColumnCount = 1 + r.nextInt(columnCount); + if (writeColumnCount == columnCount) { + doWriteFewerColumns = false; + } else { + writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); } + } - // To get the specific type information for CHAR and VARCHAR, seems like we need an - // inspector and SerDe per row... - StructObjectInspector[] rowOIs = new StructObjectInspector[num]; - LazySimpleSerDe[] serdes = new LazySimpleSerDe[num]; - LazySerDeParameters[] serdeParams = new LazySerDeParameters[num]; - for (int i = 0; i < num; i++) { - MyTestPrimitiveClass t = rows[i]; + String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); - StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + LazySimpleSerDe serde = getSerDe(fieldNames, fieldTypes); + LazySerDeParameters serdeParams = getSerDeParams(fieldNames, fieldTypes); - String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); - String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + LazySimpleSerDe serde_fewer = null; + LazySerDeParameters serdeParams_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_fewer = getSerDe(fieldNames, fieldTypes); + serdeParams_fewer = getSerDeParams(partialFieldNames, partialFieldTypes); + } + + byte separator = (byte) '\t'; + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); + + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); + + if (doWriteFewerColumns) { + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + testLazySimpleFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + separator, serdeParams, serdeParams_fewer, primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + } + } + + public void testLazySimpleFast() throws Throwable { + + try { + Random r = new Random(35790); - rowOIs[i] = rowOI; - serdes[i] = getSerDe(fieldNames, fieldTypes); - serdeParams[i] = getSerDeParams(fieldNames, fieldTypes); + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testLazySimpleFastCase(caseNum, (i % 2 == 0), r); + caseNum++; } - byte separator = (byte) '\t'; - testLazySimpleFast(rows, serdes, rowOIs, separator, serdeParams, primitiveTypeInfosArray); } catch (Throwable e) { e.printStackTrace(); throw e; diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java index 4032743..d7c4999 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; +import java.io.EOFException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -27,9 +28,13 @@ import org.apache.hadoop.hive.serde2.ByteStream.Output; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.VerifyFast; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestPrimitiveClass.ExtraTypeInfo; import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.fast.RandomRowObjectSource; +import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -43,24 +48,50 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.Writable; public class TestLazyBinaryFast extends TestCase { - private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, SerDe[] serdes, StructObjectInspector[] rowOIs, - PrimitiveTypeInfo[][] primitiveTypeInfosArray) throws Throwable { + private void testLazyBinaryFast( + RandomRowObjectSource source, Object[][] rows, + SerDe serde, StructObjectInspector rowOI, + SerDe serde_fewer, StructObjectInspector writeRowOI, + PrimitiveTypeInfo[] primitiveTypeInfos, + boolean useIncludeColumns, boolean doWriteFewerColumns, Random r) throws Throwable { - LazyBinarySerializeWrite lazyBinarySerializeWrite = new LazyBinarySerializeWrite(MyTestPrimitiveClass.primitiveCount); + int rowCount = rows.length; + int columnCount = primitiveTypeInfos.length; + + boolean[] columnsToInclude = null; + if (useIncludeColumns) { + columnsToInclude = new boolean[columnCount]; + for (int i = 0; i < columnCount; i++) { + columnsToInclude[i] = r.nextBoolean(); + } + } + + int writeColumnCount = columnCount; + PrimitiveTypeInfo[] writePrimitiveTypeInfos = primitiveTypeInfos; + if (doWriteFewerColumns) { + writeColumnCount = writeRowOI.getAllStructFieldRefs().size(); + writePrimitiveTypeInfos = Arrays.copyOf(primitiveTypeInfos, writeColumnCount); + } + + LazyBinarySerializeWrite lazyBinarySerializeWrite = + new LazyBinarySerializeWrite(writeColumnCount); // Try to serialize - BytesWritable serializeWriteBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; + BytesWritable serializeWriteBytes[] = new BytesWritable[rowCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; Output output = new Output(); lazyBinarySerializeWrite.set(output); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfosArray[i][index], object); + for (int index = 0; index < writeColumnCount; index++) { + + Writable writable = (Writable) row[index]; + + VerifyFast.serializeWrite(lazyBinarySerializeWrite, primitiveTypeInfos[index], writable); } BytesWritable bytesWritable = new BytesWritable(); @@ -69,44 +100,63 @@ private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, S } // Try to deserialize - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazyBinaryDeserializeRead lazyBinaryDeserializeRead = - new LazyBinaryDeserializeRead(primitiveTypeInfos); + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + + // Specifying the right type info length tells LazyBinaryDeserializeRead which is the last + // column. + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + new LazyBinaryDeserializeRead(writePrimitiveTypeInfos); + + if (useIncludeColumns) { + lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude); + } BytesWritable bytesWritable = serializeWriteBytes[i]; lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - PrimitiveCategory primitiveCategory = t.getPrimitiveCategory(index); - VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable); + } } lazyBinaryDeserializeRead.extraFieldsCheck(); - TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + if (doWriteFewerColumns) { + TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + } else { + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); + } TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); } // Try to deserialize using SerDe class our Writable row objects created by SerializeWrite. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { + for (int i = 0; i < rowCount; i++) { BytesWritable bytesWritable = serializeWriteBytes[i]; - LazyBinaryStruct lazyBinaryStruct = (LazyBinaryStruct) serdes[i].deserialize(bytesWritable); + LazyBinaryStruct lazyBinaryStruct; + if (doWriteFewerColumns) { + lazyBinaryStruct = (LazyBinaryStruct) serde_fewer.deserialize(bytesWritable); + } else { + lazyBinaryStruct = (LazyBinaryStruct) serde.deserialize(bytesWritable); + } - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] row = rows[i]; - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { + for (int index = 0; index < writeColumnCount; index++) { PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[index]; - Object expected = t.getPrimitiveWritableObject(index, primitiveTypeInfo); + Writable writable = (Writable) row[index]; Object object = lazyBinaryStruct.getField(index); - if (expected == null || object == null) { - if (expected != null || object != null) { + if (writable == null || object == null) { + if (writable != null || object != null) { fail("SerDe deserialized NULL column mismatch"); } } else { - if (!object.equals(expected)) { + if (!object.equals(writable)) { fail("SerDe deserialized value does not match"); } } @@ -114,88 +164,167 @@ private void testLazyBinaryFast(MyTestPrimitiveClass[] myTestPrimitiveClasses, S } // One Writable per row. - BytesWritable serdeBytes[] = new BytesWritable[myTestPrimitiveClasses.length]; - + BytesWritable serdeBytes[] = new BytesWritable[rowCount]; + // Serialize using the SerDe, then below deserialize using DeserializeRead. - Object[] row = new Object[MyTestPrimitiveClass.primitiveCount]; - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; + Object[] serdeRow = new Object[writeColumnCount]; + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; // LazyBinary seems to work better with an row object array instead of a Java object... - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveWritableObject(index, primitiveTypeInfos[index]); - row[index] = object; + for (int index = 0; index < writeColumnCount; index++) { + serdeRow[index] = row[index]; } - BytesWritable serialized = (BytesWritable) serdes[i].serialize(row, rowOIs[i]); - BytesWritable bytesWritable = new BytesWritable(); - bytesWritable.set(serialized); - byte[] bytes1 = Arrays.copyOfRange(bytesWritable.getBytes(), 0, bytesWritable.getLength()); + BytesWritable serialized; + if (doWriteFewerColumns) { + serialized = (BytesWritable) serde_fewer.serialize(serdeRow, writeRowOI); + } else { + serialized = (BytesWritable) serde.serialize(serdeRow, rowOI); + } + + BytesWritable bytesWritable = + new BytesWritable( + Arrays.copyOfRange(serialized.getBytes(), 0, serialized.getLength())); + byte[] bytes1 = bytesWritable.getBytes(); - byte[] bytes2 = Arrays.copyOfRange(serializeWriteBytes[i].getBytes(), 0, serializeWriteBytes[i].getLength()); + BytesWritable lazySerializedWriteBytes = serializeWriteBytes[i]; + byte[] bytes2 = Arrays.copyOfRange(lazySerializedWriteBytes.getBytes(), 0, lazySerializedWriteBytes.getLength()); + if (bytes1.length != bytes2.length) { + fail("SerializeWrite length " + bytes2.length + " and " + + "SerDe serialization length " + bytes1.length + + " do not match (" + Arrays.toString(primitiveTypeInfos) + ")"); + } if (!Arrays.equals(bytes1, bytes2)) { - fail("SerializeWrite and SerDe serialization does not match"); + fail("SerializeWrite and SerDe serialization does not match (" + Arrays.toString(primitiveTypeInfos) + ")"); } serdeBytes[i] = bytesWritable; } // Try to deserialize using DeserializeRead our Writable row objects created by SerDe. - for (int i = 0; i < myTestPrimitiveClasses.length; i++) { - MyTestPrimitiveClass t = myTestPrimitiveClasses[i]; - PrimitiveTypeInfo[] primitiveTypeInfos = primitiveTypeInfosArray[i]; - LazyBinaryDeserializeRead lazyBinaryDeserializeRead = + for (int i = 0; i < rowCount; i++) { + Object[] row = rows[i]; + + // When doWriteFewerColumns, try to read more fields than exist in buffer. + LazyBinaryDeserializeRead lazyBinaryDeserializeRead = new LazyBinaryDeserializeRead(primitiveTypeInfos); + if (useIncludeColumns) { + lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude); + } + BytesWritable bytesWritable = serdeBytes[i]; lazyBinaryDeserializeRead.set(bytesWritable.getBytes(), 0, bytesWritable.getLength()); - for (int index = 0; index < MyTestPrimitiveClass.primitiveCount; index++) { - Object object = t.getPrimitiveObject(index); - VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], object); + for (int index = 0; index < columnCount; index++) { + if (index >= writeColumnCount || + (useIncludeColumns && !columnsToInclude[index])) { + // Should come back a null. + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], null); + } else { + Writable writable = (Writable) row[index]; + VerifyFast.verifyDeserializeRead(lazyBinaryDeserializeRead, primitiveTypeInfos[index], writable); + } } lazyBinaryDeserializeRead.extraFieldsCheck(); TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondConfiguredFieldsWarned()); - TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + if (doWriteFewerColumns) { + // The nullByte may cause this to not be true... + // TestCase.assertTrue(lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + } else { + TestCase.assertTrue(!lazyBinaryDeserializeRead.readBeyondBufferRangeWarned()); + } TestCase.assertTrue(!lazyBinaryDeserializeRead.bufferRangeHasExtraDataWarned()); } } - public void testLazyBinaryFast() throws Throwable { - try { + public void testLazyBinaryFastCase(int caseNum, boolean doNonRandomFill, Random r) throws Throwable { + + RandomRowObjectSource source = new RandomRowObjectSource(); + source.init(r); + + int rowCount = 1000; + Object[][] rows = source.randomRows(rowCount); + + if (doNonRandomFill) { + MyTestClass.nonRandomRowFill(rows, source.primitiveCategories()); + } + + StructObjectInspector rowStructObjectInspector = source.rowStructObjectInspector(); + + PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos(); + int columnCount = primitiveTypeInfos.length; - int num = 1000; - Random r = new Random(1234); - MyTestPrimitiveClass[] rows = new MyTestPrimitiveClass[num]; - PrimitiveTypeInfo[][] primitiveTypeInfosArray = new PrimitiveTypeInfo[num][]; - for (int i = 0; i < num; i++) { - int randField = r.nextInt(MyTestPrimitiveClass.primitiveCount); - MyTestPrimitiveClass t = new MyTestPrimitiveClass(); - int field = 0; - ExtraTypeInfo extraTypeInfo = new ExtraTypeInfo(); - t.randomFill(r, randField, field, extraTypeInfo); - PrimitiveTypeInfo[] primitiveTypeInfos = MyTestPrimitiveClass.getPrimitiveTypeInfos(extraTypeInfo); - rows[i] = t; - primitiveTypeInfosArray[i] = primitiveTypeInfos; + int writeColumnCount = columnCount; + StructObjectInspector writeRowStructObjectInspector = rowStructObjectInspector; + boolean doWriteFewerColumns = r.nextBoolean(); + if (doWriteFewerColumns) { + writeColumnCount = 1 + r.nextInt(columnCount); + if (writeColumnCount == columnCount) { + doWriteFewerColumns = false; + } else { + writeRowStructObjectInspector = source.partialRowStructObjectInspector(writeColumnCount); } + } + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowStructObjectInspector); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowStructObjectInspector); + + SerDe serde = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes); + + SerDe serde_fewer = null; + if (doWriteFewerColumns) { + String partialFieldNames = ObjectInspectorUtils.getFieldNames(writeRowStructObjectInspector); + String partialFieldTypes = ObjectInspectorUtils.getFieldTypes(writeRowStructObjectInspector); + + serde_fewer = TestLazyBinarySerDe.getSerDe(partialFieldNames, partialFieldTypes);; + } - // To get the specific type information for CHAR and VARCHAR, seems like we need an - // inspector and SerDe per row... - StructObjectInspector[] rowOIs = new StructObjectInspector[num]; - SerDe[] serdes = new SerDe[num]; - for (int i = 0; i < num; i++) { - MyTestPrimitiveClass t = rows[i]; + testLazyBinaryFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + primitiveTypeInfos, + /* useIncludeColumns */ false, /* doWriteFewerColumns */ false, r); - StructObjectInspector rowOI = t.getRowInspector(primitiveTypeInfosArray[i]); + testLazyBinaryFast( + source, rows, + serde, rowStructObjectInspector, + serde_fewer, writeRowStructObjectInspector, + primitiveTypeInfos, + /* useIncludeColumns */ true, /* doWriteFewerColumns */ false, r); - String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); - String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + /* + * Can the LazyBinary format really tolerate writing fewer columns? + */ + // if (doWriteFewerColumns) { + // testLazyBinaryFast( + // source, rows, + // serde, rowStructObjectInspector, + // serde_fewer, writeRowStructObjectInspector, + // primitiveTypeInfos, + // /* useIncludeColumns */ false, /* doWriteFewerColumns */ true, r); + + // testLazyBinaryFast( + // source, rows, + // serde, rowStructObjectInspector, + // serde_fewer, writeRowStructObjectInspector, + // primitiveTypeInfos, + // /* useIncludeColumns */ true, /* doWriteFewerColumns */ true, r); + // } + } + + public void testLazyBinaryFast() throws Throwable { + + try { + Random r = new Random(35790); - rowOIs[i] = rowOI; - serdes[i] = TestLazyBinarySerDe.getSerDe(fieldNames, fieldTypes); + int caseNum = 0; + for (int i = 0; i < 10; i++) { + testLazyBinaryFastCase(caseNum, (i % 2 == 0), r); + caseNum++; } - testLazyBinaryFast(rows, serdes, rowOIs, primitiveTypeInfosArray); } catch (Throwable e) { e.printStackTrace(); throw e; diff --git storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java index 3fb0cfd..53a7823 100644 --- storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java +++ storage-api/src/java/org/apache/hadoop/hive/common/type/RandomTypeUtil.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.common.type; +import java.sql.Date; import java.sql.Timestamp; import java.text.DateFormat; import java.text.ParseException; @@ -26,6 +27,100 @@ public class RandomTypeUtil { + public static String getRandString(Random r) { + return getRandString(r, null, r.nextInt(10)); + } + + public static String getRandString(Random r, String characters, int length) { + if (characters == null) { + characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < length; i++) { + if (characters == null) { + sb.append((char) (r.nextInt(128))); + } else { + sb.append(characters.charAt(r.nextInt(characters.length()))); + } + } + return sb.toString(); + } + + public static byte[] getRandBinary(Random r, int len){ + byte[] bytes = new byte[len]; + for (int j = 0; j < len; j++){ + bytes[j] = Byte.valueOf((byte) r.nextInt()); + } + return bytes; + } + + private static final String DECIMAL_CHARS = "0123456789"; + + public static class HiveDecimalAndPrecisionScale { + public HiveDecimal hiveDecimal; + public int precision; + public int scale; + + HiveDecimalAndPrecisionScale(HiveDecimal hiveDecimal, int precision, int scale) { + this.hiveDecimal = hiveDecimal; + this.precision = precision; + this.scale = scale; + } + } + + public static HiveDecimalAndPrecisionScale getRandHiveDecimal(Random r) { + int precision; + int scale; + while (true) { + StringBuilder sb = new StringBuilder(); + precision = 1 + r.nextInt(18); + scale = 0 + r.nextInt(precision + 1); + + int integerDigits = precision - scale; + + if (r.nextBoolean()) { + sb.append("-"); + } + + if (integerDigits == 0) { + sb.append("0"); + } else { + sb.append(getRandString(r, DECIMAL_CHARS, integerDigits)); + } + if (scale != 0) { + sb.append("."); + sb.append(getRandString(r, DECIMAL_CHARS, scale)); + } + + HiveDecimal bd = HiveDecimal.create(sb.toString()); + precision = bd.precision(); + scale = bd.scale(); + if (scale > precision) { + // Sometimes weird decimals are produced? + continue; + } + + // For now, punt. + precision = HiveDecimal.SYSTEM_DEFAULT_PRECISION; + scale = HiveDecimal.SYSTEM_DEFAULT_SCALE; + return new HiveDecimalAndPrecisionScale(bd, precision, scale); + } + } + + public static Date getRandDate(Random r) { + String dateStr = String.format("%d-%02d-%02d", + Integer.valueOf(1800 + r.nextInt(500)), // year + Integer.valueOf(1 + r.nextInt(12)), // month + Integer.valueOf(1 + r.nextInt(28))); // day + Date dateVal = Date.valueOf(dateStr); + return dateVal; + } + + /** + * TIMESTAMP. + */ + public static final long NANOSECONDS_PER_SECOND = TimeUnit.SECONDS.toNanos(1); public static final long MILLISECONDS_PER_SECOND = TimeUnit.SECONDS.toMillis(1); public static final long NANOSECONDS_PER_MILLISSECOND = TimeUnit.MILLISECONDS.toNanos(1);