diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java index 84c2a5e..8cb1658 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hive.serde2.lazy; import java.util.Arrays; +import java.util.HashSet; import java.util.LinkedHashMap; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -46,6 +48,11 @@ int mapSize = 0; /** + * Does not include duplicate keys + */ + int actualMapSize = 0; + + /** * The beginning position of key[i]. Only valid when the data is parsed. Note * that keyStart[mapSize] = begin + length + 1; that makes sure we can use the * same formula to compute the length of each value in the map. @@ -140,6 +147,7 @@ private void parse() { int keyValueSeparatorPosition = -1; int elementByteEnd = start; byte[] bytes = this.bytes.getData(); + Set keySet = new HashSet(); // Go through all bytes in the byte[] while (elementByteEnd <= arrayByteEnd) { @@ -160,6 +168,12 @@ private void parse() { mapSize++; elementByteBegin = elementByteEnd + 1; elementByteEnd++; + LazyPrimitive lazyKey = uncheckedGetKey(mapSize - 1); + if (lazyKey == null) { + continue; + } + Object key = lazyKey.getObject(); + keySet.add(key); } else { // Is this the first keyValueSeparator in this entry? if (keyValueSeparatorPosition == -1 @@ -181,9 +195,10 @@ private void parse() { keyStart[mapSize] = arrayByteEnd + 1; if (mapSize > 0) { - Arrays.fill(keyInited, 0, mapSize, false); Arrays.fill(valueInited, 0, mapSize, false); } + + actualMapSize = keySet.size(); } /** @@ -329,7 +344,7 @@ public int getMapSize() { if (!parsed) { parse(); } - return mapSize; + return actualMapSize; } protected boolean getParsed() { diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java index e8889c2..3e596ee 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java @@ -159,6 +159,7 @@ public void testLazyMap() throws Throwable { assertEquals("{2:'def',-1:null,0:'0',8:'abc'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi)); + assertEquals(4, b.getMapSize()); } { @@ -184,6 +185,7 @@ public void testLazyMap() throws Throwable { assertEquals("{'2':'d\\tf','-1':null,'0':'0','8':'abc'}" .replace('\'', '\"'), SerDeUtils.getJSONString(b, oi)); + assertEquals(4, b.getMapSize()); } } catch (Throwable e) { @@ -220,6 +222,7 @@ public void testLazyMapWithDuplicateKeys() throws Throwable { assertEquals("{2:'def',-1:null,0:'0'}".replace('\'', '\"'), SerDeUtils.getJSONString(b, oi)); + assertEquals(3, b.getMapSize()); assertEquals(3, b.getMap().size()); } @@ -244,6 +247,7 @@ public void testLazyMapWithDuplicateKeys() throws Throwable { assertEquals("{'2':'d\\tf','-1':null,'0':'0'}" .replace('\'', '\"'), SerDeUtils.getJSONString(b, oi)); + assertEquals(3, b.getMapSize()); assertEquals(3, b.getMap().size()); } @@ -305,6 +309,7 @@ public void testLazyStruct() throws Throwable { SerDeUtils.getJSONString(o, oi)); data = new Text(": : : :"); + o = (LazyStruct) LazyFactory.createLazyObject(oi); TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, data .getLength()); assertEquals("{'a':null,'b':['',''],'c':{'':null},'d':':'}"