diff --git ql/src/test/results/clientpositive/convert_enum_to_string.q.out ql/src/test/results/clientpositive/convert_enum_to_string.q.out index 6cf1931..eb4813f 100644 --- ql/src/test/results/clientpositive/convert_enum_to_string.q.out +++ ql/src/test/results/clientpositive/convert_enum_to_string.q.out @@ -38,7 +38,7 @@ my_enum_structlist_map map> my_stringlist array from deserializer my_structlist array> from deserializer my_enumlist array from deserializer -my_stringset struct<> from deserializer -my_enumset struct<> from deserializer -my_structset struct<> from deserializer +my_stringset set from deserializer +my_enumset set from deserializer +my_structset set> from deserializer b string diff --git serde/if/serde.thrift serde/if/serde.thrift index e40c697..e785e98 100644 --- serde/if/serde.thrift +++ serde/if/serde.thrift @@ -56,6 +56,7 @@ const string TIMESTAMP_TYPE_NAME = "timestamp"; const string BINARY_TYPE_NAME = "binary"; const string LIST_TYPE_NAME = "array"; +const string SET_TYPE_NAME = "set"; const string MAP_TYPE_NAME = "map"; const string STRUCT_TYPE_NAME = "struct"; const string UNION_TYPE_NAME = "uniontype"; @@ -64,6 +65,6 @@ const string LIST_COLUMNS = "columns"; const string LIST_COLUMN_TYPES = "columns.types"; const set PrimitiveTypes = [ VOID_TYPE_NAME BOOLEAN_TYPE_NAME TINYINT_TYPE_NAME SMALLINT_TYPE_NAME INT_TYPE_NAME BIGINT_TYPE_NAME FLOAT_TYPE_NAME DOUBLE_TYPE_NAME STRING_TYPE_NAME DATE_TYPE_NAME DATETIME_TYPE_NAME TIMESTAMP_TYPE_NAME BINARY_TYPE_NAME], -const set CollectionTypes = [ LIST_TYPE_NAME MAP_TYPE_NAME ], +const set CollectionTypes = [ LIST_TYPE_NAME SET_TYPE_NAME MAP_TYPE_NAME ], diff --git serde/src/gen/thrift/gen-cpp/serde_constants.cpp serde/src/gen/thrift/gen-cpp/serde_constants.cpp index 350521d..cc5cf3d 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.cpp +++ serde/src/gen/thrift/gen-cpp/serde_constants.cpp @@ -66,6 +66,8 @@ serdeConstants::serdeConstants() { LIST_TYPE_NAME = "array"; + SET_TYPE_NAME = "set"; + MAP_TYPE_NAME = "map"; STRUCT_TYPE_NAME = "struct"; @@ -91,6 +93,7 @@ serdeConstants::serdeConstants() { PrimitiveTypes.insert("binary"); CollectionTypes.insert("array"); + CollectionTypes.insert("set"); CollectionTypes.insert("map"); } diff --git serde/src/gen/thrift/gen-cpp/serde_constants.h serde/src/gen/thrift/gen-cpp/serde_constants.h index 94015c2..1ac6448 100644 --- serde/src/gen/thrift/gen-cpp/serde_constants.h +++ serde/src/gen/thrift/gen-cpp/serde_constants.h @@ -42,6 +42,7 @@ class serdeConstants { std::string TIMESTAMP_TYPE_NAME; std::string BINARY_TYPE_NAME; std::string LIST_TYPE_NAME; + std::string SET_TYPE_NAME; std::string MAP_TYPE_NAME; std::string STRUCT_TYPE_NAME; std::string UNION_TYPE_NAME; diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/Constants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/Constants.java index 2c32999..7a2df3d 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/Constants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/Constants.java @@ -79,6 +79,8 @@ public class Constants { public static final String LIST_TYPE_NAME = "array"; + public static final String SET_TYPE_NAME = "set"; + public static final String MAP_TYPE_NAME = "map"; public static final String STRUCT_TYPE_NAME = "struct"; @@ -109,6 +111,7 @@ public class Constants { public static final Set CollectionTypes = new HashSet(); static { CollectionTypes.add("array"); + CollectionTypes.add("set"); CollectionTypes.add("map"); } diff --git serde/src/gen/thrift/gen-php/serde/serde_constants.php serde/src/gen/thrift/gen-php/serde/serde_constants.php index 99bda06..446e25f 100644 --- serde/src/gen/thrift/gen-php/serde/serde_constants.php +++ serde/src/gen/thrift/gen-php/serde/serde_constants.php @@ -64,6 +64,8 @@ $GLOBALS['serde_CONSTANTS']['BINARY_TYPE_NAME'] = "binary"; $GLOBALS['serde_CONSTANTS']['LIST_TYPE_NAME'] = "array"; +$GLOBALS['serde_CONSTANTS']['SET_TYPE_NAME'] = "set"; + $GLOBALS['serde_CONSTANTS']['MAP_TYPE_NAME'] = "map"; $GLOBALS['serde_CONSTANTS']['STRUCT_TYPE_NAME'] = "struct"; @@ -92,6 +94,7 @@ $GLOBALS['serde_CONSTANTS']['PrimitiveTypes'] = array( $GLOBALS['serde_CONSTANTS']['CollectionTypes'] = array( "array" => true, + "set" => true, "map" => true, ); diff --git serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py index b9e01c1..0218b46 100644 --- serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py +++ serde/src/gen/thrift/gen-py/org_apache_hadoop_hive_serde/constants.py @@ -35,6 +35,7 @@ DATETIME_TYPE_NAME = "datetime" TIMESTAMP_TYPE_NAME = "timestamp" BINARY_TYPE_NAME = "binary" LIST_TYPE_NAME = "array" +SET_TYPE_NAME = "set" MAP_TYPE_NAME = "map" STRUCT_TYPE_NAME = "struct" UNION_TYPE_NAME = "uniontype" @@ -57,5 +58,6 @@ PrimitiveTypes = set([ ]) CollectionTypes = set([ "array", + "set", "map", ]) diff --git serde/src/gen/thrift/gen-rb/serde_constants.rb serde/src/gen/thrift/gen-rb/serde_constants.rb index 7ef2fc9..b4af012 100644 --- serde/src/gen/thrift/gen-rb/serde_constants.rb +++ serde/src/gen/thrift/gen-rb/serde_constants.rb @@ -62,6 +62,8 @@ BINARY_TYPE_NAME = %q"binary" LIST_TYPE_NAME = %q"array" +SET_TYPE_NAME = %q"set" + MAP_TYPE_NAME = %q"map" STRUCT_TYPE_NAME = %q"struct" @@ -90,6 +92,7 @@ PrimitiveTypes = Set.new([ CollectionTypes = Set.new([ %q"array", + %q"set", %q"map", ]) diff --git serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index e906a3f..0237156 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.commons.logging.Log; @@ -30,6 +31,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SetObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -305,6 +307,28 @@ public final class SerDeUtils { } break; } + case SET: { + SetObjectInspector soi = (SetObjectInspector) oi; + ObjectInspector setElementObjectInspector = soi + .getSetElementObjectInspector(); + Set oset = soi.getSet(o); + if (oset == null) { + sb.append("null"); + } else { + sb.append(LBRACKET); + boolean first = true; + for (Object element : oset) { + if (first) { + first = false; + } else { + sb.append(COMMA); + } + buildJSONString(sb, element, setElementObjectInspector); + } + sb.append(RBRACKET); + } + break; + } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector mapKeyObjectInspector = moi.getMapKeyObjectInspector(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index 20758a7..5b02f56 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -24,10 +24,11 @@ import java.util.List; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySetObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; @@ -45,13 +46,14 @@ import org.apache.hadoop.hive.serde2.lazydio.LazyDioInteger; import org.apache.hadoop.hive.serde2.lazydio.LazyDioLong; import org.apache.hadoop.hive.serde2.lazydio.LazyDioShort; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.SetTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; @@ -154,6 +156,8 @@ public final class LazyFactory { return new LazyMap((LazyMapObjectInspector) oi); case LIST: return new LazyArray((LazyListObjectInspector) oi); + case SET: + return new LazySet((LazySetObjectInspector) oi); case STRUCT: return new LazyStruct((LazySimpleStructObjectInspector) oi); case UNION: @@ -222,6 +226,12 @@ public final class LazyFactory { .getListElementTypeInfo(), separator, separatorIndex + 1, nullSequence, escaped, escapeChar), separator[separatorIndex], nullSequence, escaped, escapeChar); + case SET: + return LazyObjectInspectorFactory.getLazySimpleSetObjectInspector( + createLazyObjectInspector(((SetTypeInfo) typeInfo) + .getSetElementTypeInfo(), separator, separatorIndex + 1, + nullSequence, escaped, escapeChar), separator[separatorIndex], + nullSequence, escaped, escapeChar); case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; List fieldNames = structTypeInfo.getAllStructFieldNames(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySet.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySet.java new file mode 100644 index 0000000..4f80ee0 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySet.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.Set; + +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySetObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * LazySet stores a set of LazyObjects. + * + * LazySet does not deal with the case of a NULL set. That is handled by the + * parent LazyObject. + */ +public class LazySet extends LazyNonPrimitive { + + /** + * Whether the data is already parsed or not. + */ + boolean parsed = false; + + /** + * The size of the set. Only valid when the data is parsed. -1 when the set is + * NULL. + */ + int setSize = 0; + + /** + * The beginning position of elements[i]. Only valid when the data is parsed. Note + * that valueStart[setSize] = begin + length + 1; that makes sure we can use the + * same formula to compute the length of each value in the set. + */ + int[] valueStart; + + /** + * The elements are stored in an array of LazyObjects. elements[index] will start + * from valueStart[index], and ends at valueStart[index+1] - 1. + */ + LazyObject[] elements; + + /** + * Whether init() is called on elements[i]. + */ + boolean[] elementInited; + + /** + * cachedSet is reused for different calls to getSet(). But each LazySet has a + * separate cachedSet so we won't overwrite the data by accident. + */ + protected LinkedHashSet cachedSet; + + /** + * Construct a LazySet object with the ObjectInspector. + */ + protected LazySet(LazySetObjectInspector oi) { + super(oi); + } + + /** + * Set the row data for this LazyArray. + * + * @see LazyObject#init(ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + super.init(bytes, start, length); + parsed = false; + cachedSet = null; + } + + /** + * Enlarge the size of arrays storing information for the elements inside the + * array. + */ + protected void enlargeArrays() { + if (valueStart == null) { + int initialSize = 2; + valueStart = new int[initialSize]; + elements = new LazyObject[initialSize]; + elementInited = new boolean[initialSize]; + } else { + valueStart = Arrays.copyOf(valueStart, valueStart.length * 2); + elements = Arrays.copyOf(elements, elements.length * 2); + elementInited = Arrays.copyOf(elementInited, elementInited.length * 2); + } + } + + /** + * Parse the byte[] and fill valueStart + */ + private void parse() { + parsed = true; + + byte itemSeparator = oi.getItemSeparator(); + boolean isEscaped = oi.isEscaped(); + byte escapeChar = oi.getEscapeChar(); + + // empty array? + if (length == 0) { + setSize = 0; + return; + } + + setSize = 0; + int arrayByteEnd = start + length; + int elementByteBegin = start; + int elementByteEnd = start; + byte[] bytes = this.bytes.getData(); + + // Go through all bytes in the byte[] + while (elementByteEnd <= arrayByteEnd) { + // End of entry reached? + if (elementByteEnd == arrayByteEnd + || bytes[elementByteEnd] == itemSeparator) { + // Array full? + if (valueStart == null || setSize + 1 == valueStart.length) { + enlargeArrays(); + } + valueStart[setSize] = elementByteBegin; + setSize++; + elementByteBegin = elementByteEnd + 1; + elementByteEnd++; + } else { + if (isEscaped && bytes[elementByteEnd] == escapeChar + && elementByteEnd + 1 < arrayByteEnd) { + // ignore the char after escape_char + elementByteEnd += 2; + } else { + elementByteEnd++; + } + } + } + + // This makes sure we can use the same formula to compute the + // length of each value in the set. + valueStart[setSize] = arrayByteEnd + 1; + + if (setSize > 0) { + Arrays.fill(elementInited, 0, setSize, false); + } + } + + /** + * Get the element with the index without checking parsed. + * + * @param index + * The index into the array starting from 0 + */ + private LazyObject uncheckedGetElement(int index) { + if (elementInited[index]) { + return elements[index]; + } + elementInited[index] = true; + Text nullSequence = oi.getNullSequence(); + int valueIBegin = valueStart[index]; + int valueILength = valueStart[index + 1] - 1 - valueIBegin; + + if ((valueILength == nullSequence.getLength()) + && (0 == LazyUtils.compare(bytes.getData(), valueIBegin, valueILength, + nullSequence.getBytes(), 0, nullSequence.getLength()))) { + return elements[index] = null; + } + elements[index] = LazyFactory + .createLazyObject(oi.getSetElementObjectInspector()); + elements[index].init(bytes, valueIBegin, valueILength); + return elements[index]; + } + + /** + * Return the Set object representing this LazySet. + * + * @return the set object + */ + public Set getSet() { + if (!parsed) { + parse(); + } + if (cachedSet != null) { + return cachedSet; + } + // Use LinkedHashSet to provide deterministic order + cachedSet = new LinkedHashSet(); + + // go through each element of the map + for (int i = 0; i < setSize; i++) { + LazyObject lazyElement = uncheckedGetElement(i); + Object element = (lazyElement == null ? null : lazyElement.getObject()); + cachedSet.add(element); + } + return cachedSet; + } + + /** + * Get the size of the Set represented by this LazySet. + * + * @return The size of the Set, -1 for NULL Set. + */ + public int getSetSize() { + if (!parsed) { + parse(); + } + return setSize; + } + + protected boolean getParsed() { + return parsed; + } + + protected void setParsed(boolean parsed) { + this.parsed = parsed; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index 0036a8e..a932f0d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -23,6 +23,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -35,11 +36,12 @@ import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SetObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -423,12 +425,13 @@ public class LazySimpleSerDe implements SerDe { char separator; List list; switch (objInspector.getCategory()) { - case PRIMITIVE: + case PRIMITIVE: { LazyUtils.writePrimitiveUTF8(out, obj, (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); return; - case LIST: + } + case LIST: { separator = (char) separators[level]; ListObjectInspector loi = (ListObjectInspector) objInspector; list = loi.getList(obj); @@ -445,7 +448,29 @@ public class LazySimpleSerDe implements SerDe { } } return; - case MAP: + } + case SET: { + separator = (char) separators[level]; + SetObjectInspector soi = (SetObjectInspector) objInspector; + Set set = soi.getSet(obj); + ObjectInspector eoi = soi.getSetElementObjectInspector(); + if (set == null) { + out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); + } else { + boolean first = true; + for (Object element : set) { + if (first) { + first = false; + } else { + out.write(separator); + } + serialize(out, element, eoi, separators, level + 1, nullSequence, + escaped, escapeChar, needsEscape); + } + } + return; + } + case MAP: { separator = (char) separators[level]; char keyValueSeparator = (char) separators[level + 1]; MapObjectInspector moi = (MapObjectInspector) objInspector; @@ -470,7 +495,8 @@ public class LazySimpleSerDe implements SerDe { } } return; - case STRUCT: + } + case STRUCT: { separator = (char) separators[level]; StructObjectInspector soi = (StructObjectInspector) objInspector; List fields = soi.getAllStructFieldRefs(); @@ -488,7 +514,8 @@ public class LazySimpleSerDe implements SerDe { } } return; - case UNION: + } + case UNION: { separator = (char) separators[level]; UnionObjectInspector uoi = (UnionObjectInspector) objInspector; List ois = uoi.getObjectInspectors(); @@ -504,6 +531,7 @@ public class LazySimpleSerDe implements SerDe { needsEscape); } return; + } default: break; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java index 8fee0fc..5f149a2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java @@ -99,6 +99,27 @@ public final class LazyObjectInspectorFactory { return result; } + static HashMap, LazySetObjectInspector> cachedLazySimpleSetObjectInspector = new HashMap, LazySetObjectInspector>(); + + public static LazySetObjectInspector getLazySimpleSetObjectInspector( + ObjectInspector setElementObjectInspector, + byte itemSeparator, Text nullSequence, boolean escaped, byte escapeChar) { + ArrayList signature = new ArrayList(); + signature.add(setElementObjectInspector); + signature.add(Byte.valueOf(itemSeparator)); + signature.add(nullSequence.toString()); + signature.add(Boolean.valueOf(escaped)); + signature.add(Byte.valueOf(escapeChar)); + LazySetObjectInspector result = cachedLazySimpleSetObjectInspector + .get(signature); + if (result == null) { + result = new LazySetObjectInspector(setElementObjectInspector, + itemSeparator, nullSequence, escaped, escapeChar); + cachedLazySimpleSetObjectInspector.put(signature, result); + } + return result; + } + static HashMap, LazyMapObjectInspector> cachedLazySimpleMapObjectInspector = new HashMap, LazyMapObjectInspector>(); public static LazyMapObjectInspector getLazySimpleMapObjectInspector( diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySetObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySetObjectInspector.java new file mode 100644 index 0000000..fe0e03c --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySetObjectInspector.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazy.objectinspector; + +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.lazy.LazySet; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.SetObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * LazySetObjectInspector works on data stored in LazySet. + * + * Always use the ObjectInspectorFactory to create new ObjectInspector objects, + * instead of directly creating an instance of this class. + */ +public class LazySetObjectInspector implements SetObjectInspector { + + public static final Log LOG = LogFactory.getLog(LazySetObjectInspector.class + .getName()); + + ObjectInspector elementObjectInspector; + + byte itemSeparator; + Text nullSequence; + boolean escaped; + byte escapeChar; + + /** + * Call LazyObjectInspectorFactory.getLazySimpleSetObjectInspector instead. + */ + protected LazySetObjectInspector(ObjectInspector elementObjectInspector, + byte itemSeparator, Text nullSequence, boolean escaped, + byte escapeChar) { + this.elementObjectInspector = elementObjectInspector; + this.itemSeparator = itemSeparator; + this.nullSequence = nullSequence; + this.escaped = escaped; + this.escapeChar = escapeChar; + } + + // Overriding ObjectInspector + @Override + public final Category getCategory() { + return Category.SET; + } + + @Override + public String getTypeName() { + return org.apache.hadoop.hive.serde.Constants.SET_TYPE_NAME + "<" + + elementObjectInspector.getTypeName() + + ">"; + } + + // Overriding SetObjectInspector + public ObjectInspector getSetElementObjectInspector() { + return elementObjectInspector; + } + + public int getSetSize(Object data) { + if (data == null) { + return -1; + } + return ((LazySet) data).getSetSize(); + } + + public Set getSet(Object data) { + if (data == null) { + return null; + } + return ((LazySet) data).getSet(); + } + + // Called by LazySet + public byte getItemSeparator() { + return itemSeparator; + } + + public Text getNullSequence() { + return nullSequence; + } + + public boolean isEscaped() { + return escaped; + } + + public byte getEscapeChar() { + return escapeChar; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java index d90560b..4b41f8f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java @@ -42,7 +42,7 @@ public interface ObjectInspector extends Cloneable { * */ public static enum Category { - PRIMITIVE, LIST, MAP, STRUCT, UNION + PRIMITIVE, LIST, SET, MAP, STRUCT, UNION }; /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java index 7537e99..985a45a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorConverters.java @@ -21,6 +21,7 @@ package org.apache.hadoop.hive.serde2.objectinspector; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorConverter; @@ -129,6 +130,9 @@ public final class ObjectInspectorConverters { case LIST: return new ListConverter((ListObjectInspector) inputOI, (SettableListObjectInspector) outputOI); + case SET: + return new SetConverter((SetObjectInspector) inputOI, + (SettableSetObjectInspector) outputOI); case MAP: return new MapConverter((MapObjectInspector) inputOI, (SettableMapObjectInspector) outputOI); @@ -193,6 +197,56 @@ public final class ObjectInspectorConverters { } /** + * A converter class for Set. + */ + public static class SetConverter implements Converter { + + SetObjectInspector inputOI; + SettableSetObjectInspector outputOI; + + ObjectInspector inputElementOI; + ObjectInspector outputElementOI; + + ArrayList elementConverters; + + Object output; + + public SetConverter(SetObjectInspector inputOI, + SettableSetObjectInspector outputOI) { + this.inputOI = inputOI; + this.outputOI = outputOI; + inputElementOI = inputOI.getSetElementObjectInspector(); + outputElementOI = outputOI.getSetElementObjectInspector(); + output = outputOI.create(); + elementConverters = new ArrayList(); + } + + @Override + public Object convert(Object input) { + if (input == null) { + return null; + } + // Create enough elementConverters + // NOTE: we have to have a separate elementConverter for each element, + // because the elementConverters can reuse the internal object. + // So it's not safe to use the same elementConverter to convert multiple + // elements. + while (elementConverters.size() < inputOI.getSetSize(input)) { + elementConverters.add(getConverter(inputElementOI, outputElementOI)); + } + + // Convert the elements + Set set = inputOI.getSet(input); + int index = 0; + for (Object inputElement : set) { + Object outputElement = elementConverters.get(index++).convert(inputElement); + outputOI.add(output, outputElement); + } + return output; + } + } + + /** * A converter class for Struct. */ public static class StructConverter implements Converter { diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java index 0e39073..c01e996 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -115,6 +116,11 @@ public final class ObjectInspectorFactory { return getStandardListObjectInspector(getReflectionObjectInspector(pt .getActualTypeArguments()[0], options)); } + // Set? + if (Set.class.isAssignableFrom((Class) pt.getRawType())) { + return getStandardSetObjectInspector(getReflectionObjectInspector(pt + .getActualTypeArguments()[0], options)); + } // Map? if (Map.class.isAssignableFrom((Class) pt.getRawType())) { return getStandardMapObjectInspector(getReflectionObjectInspector(pt @@ -215,6 +221,23 @@ public final class ObjectInspectorFactory { return new StandardConstantListObjectInspector(listElementObjectInspector, constantValue); } + static HashMap cachedStandardSetObjectInspector = new HashMap(); + + public static StandardSetObjectInspector getStandardSetObjectInspector( + ObjectInspector setElementObjectInspector) { + StandardSetObjectInspector result = cachedStandardSetObjectInspector + .get(setElementObjectInspector); + if (result == null) { + result = new StandardSetObjectInspector(setElementObjectInspector); + cachedStandardSetObjectInspector.put(setElementObjectInspector, result); + } + return result; + } + + public static StandardConstantSetObjectInspector getStandardConstantSetObjectInspector( + ObjectInspector setElementObjectInspector, Set constantValue) { + return new StandardConstantSetObjectInspector(setElementObjectInspector, constantValue); + } static HashMap, StandardMapObjectInspector> cachedStandardMapObjectInspector = new HashMap, StandardMapObjectInspector>(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java index bedc9dd..113a46d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java @@ -23,9 +23,11 @@ import java.lang.reflect.Modifier; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -135,6 +137,13 @@ public final class ObjectInspectorUtils { .getListElementObjectInspector(), objectInspectorOption)); break; } + case SET: { + SetObjectInspector soi = (SetObjectInspector) oi; + result = ObjectInspectorFactory + .getStandardSetObjectInspector(getStandardObjectInspector(soi + .getSetElementObjectInspector(), objectInspectorOption)); + break; + } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; result = ObjectInspectorFactory.getStandardMapObjectInspector( @@ -252,6 +261,17 @@ public final class ObjectInspectorUtils { result = list; break; } + case SET: { + SetObjectInspector soi = (SetObjectInspector) oi; + Set inputSet = soi.getSet(o); + Set outputSet = new HashSet(); + for (Object inputElement : inputSet) { + outputSet.add(copyToStandardObject(inputElement, soi + .getSetElementObjectInspector(), objectInspectorOption)); + } + result = outputSet; + break; + } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; HashMap map = new HashMap(); @@ -378,6 +398,11 @@ public final class ObjectInspectorUtils { return oi.getClass().getSimpleName() + "<" + getObjectInspectorName(loi.getListElementObjectInspector()) + ">"; } + case SET: { + SetObjectInspector soi = (SetObjectInspector) oi; + return oi.getClass().getSimpleName() + "<" + + getObjectInspectorName(soi.getSetElementObjectInspector()) + ">"; + } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; return oi.getClass().getSimpleName() + "<" @@ -485,6 +510,15 @@ public final class ObjectInspectorUtils { } return r; } + case SET: { + int r = 0; + SetObjectInspector setOI = (SetObjectInspector)objIns; + ObjectInspector elemOI = setOI.getSetElementObjectInspector(); + for (Object inputElement : setOI.getSet(o)) { + r = 31 * r + hashCode(inputElement, elemOI); + } + return r; + } case MAP: { int r = 0; MapObjectInspector mapOI = (MapObjectInspector)objIns; @@ -530,12 +564,18 @@ public final class ObjectInspectorUtils { */ public static boolean compareSupported(ObjectInspector oi) { switch (oi.getCategory()) { - case PRIMITIVE: + case PRIMITIVE: { return true; - case LIST: + } + case LIST: { ListObjectInspector loi = (ListObjectInspector) oi; return compareSupported(loi.getListElementObjectInspector()); - case STRUCT: + } + case SET: { + SetObjectInspector soi = (SetObjectInspector) oi; + return compareSupported(soi.getSetElementObjectInspector()); + } + case STRUCT: { StructObjectInspector soi = (StructObjectInspector) oi; List fields = soi.getAllStructFieldRefs(); for (int f = 0; f < fields.size(); f++) { @@ -544,9 +584,11 @@ public final class ObjectInspectorUtils { } } return true; - case MAP: + } + case MAP: { return false; - case UNION: + } + case UNION: { UnionObjectInspector uoi = (UnionObjectInspector) oi; for (ObjectInspector eoi : uoi.getObjectInspectors()) { if (!compareSupported(eoi)) { @@ -554,6 +596,7 @@ public final class ObjectInspectorUtils { } } return true; + } default: return false; } @@ -692,6 +735,26 @@ public final class ObjectInspectorUtils { } return loi1.getListLength(o1) - loi2.getListLength(o2); } + case SET: { + SetObjectInspector soi1 = (SetObjectInspector) oi1; + SetObjectInspector soi2 = (SetObjectInspector) oi2; + ObjectInspector seoi1 = soi1.getSetElementObjectInspector(); + ObjectInspector seoi2 = soi2.getSetElementObjectInspector(); + + Set set1 = soi1.getSet(o1); + Set set2 = soi1.getSet(o2); + int minimum = Math.min(soi1.getSetSize(o1), soi2.getSetSize(o2)); + Iterator it1 = set1.iterator(); + Iterator it2 = set2.iterator(); + + for (int i = 0; i < minimum; i++) { + int r = compare(it1.next(), seoi1, it2.next(), seoi2, mapEqualComparer); + if (r != 0) { + return r; + } + } + return soi1.getSetSize(o1) - soi2.getSetSize(o2); + } case MAP: { if (mapEqualComparer == null) { throw new RuntimeException("Compare on map type not supported!"); @@ -808,6 +871,15 @@ public final class ObjectInspectorUtils { return compareTypes(child1, child2); } + // If sets, recursively compare the set element types + if (c1.equals(Category.SET)) { + ObjectInspector child1 = + ((SetObjectInspector) o1).getSetElementObjectInspector(); + ObjectInspector child2 = + ((SetObjectInspector) o2).getSetElementObjectInspector(); + return compareTypes(child1, child2); + } + // If maps, recursively compare the key and value types if (c1.equals(Category.MAP)) { MapObjectInspector mapOI1 = (MapObjectInspector) o1; @@ -905,6 +977,14 @@ public final class ObjectInspectorUtils { ObjectInspectorCopyOption.WRITABLE ), (List)writableValue); + case SET: + SetObjectInspector soi = (SetObjectInspector) oi; + return ObjectInspectorFactory.getStandardConstantSetObjectInspector( + getStandardObjectInspector( + soi.getSetElementObjectInspector(), + ObjectInspectorCopyOption.WRITABLE + ), + (Set)writableValue); case MAP: MapObjectInspector moi = (MapObjectInspector) oi; return ObjectInspectorFactory.getStandardConstantMapObjectInspector( @@ -931,6 +1011,7 @@ public final class ObjectInspectorUtils { switch (oi.getCategory()) { case PRIMITIVE: case LIST: + case SET: case MAP: return true; default: diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SetObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SetObjectInspector.java new file mode 100644 index 0000000..033d2f9 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SetObjectInspector.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.Set; + +/** + * SetObjectInspector. + * + */ +public interface SetObjectInspector extends ObjectInspector { + + // ** Methods that does not need a data object ** + ObjectInspector getSetElementObjectInspector(); + + // ** Methods that need a data object ** + /** + * returns -1 for data = null. + */ + int getSetSize(Object data); + + /** + * returns null for data = null. + * + * Note: This method should not return a Set object that is reused by the + * same SetObjectInspector, because it's possible that the same + * SetObjectInspector will be used in multiple places in the code. + * + * However it's OK if the Set object is part of the Object data. + */ + Set getSet(Object data); + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableSetObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableSetObjectInspector.java new file mode 100644 index 0000000..618211c --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/SettableSetObjectInspector.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +/** + * SettableListObjectInspector. + * + */ +public interface SettableSetObjectInspector extends SetObjectInspector { + + /** + * Create an empty set. + */ + Object create(); + + /** + * Add a new element to the set. Return the set. + */ + Object add(Object set, Object element); +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardConstantSetObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardConstantSetObjectInspector.java new file mode 100644 index 0000000..74d5c73 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardConstantSetObjectInspector.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.Set; + +/** + * A StandardSetObjectInspector which also implements the + * ConstantObjectInspector interface. + * + * Always use the ObjectInspectorFactory to create new ObjectInspector objects, + * instead of directly creating an instance of this class. + */ +public class StandardConstantSetObjectInspector extends StandardSetObjectInspector + implements ConstantObjectInspector { + + private Set value; + + /** + * Call ObjectInspectorFactory.getStandardSetObjectInspector instead. + */ + protected StandardConstantSetObjectInspector( + ObjectInspector setElementObjectInspector, Set value) { + super(setElementObjectInspector); + this.value = value; + } + + @Override + public Set getWritableConstantValue() { + return value; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardSetObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardSetObjectInspector.java new file mode 100644 index 0000000..75b12fb --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardSetObjectInspector.java @@ -0,0 +1,94 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.HashSet; +import java.util.Set; + +/** + * DefaultSetObjectInspector works on Set data that is stored as a Java Set object. + * + * Always use the ObjectInspectorFactory to create new ObjectInspector objects, + * instead of directly creating an instance of this class. + */ +public class StandardSetObjectInspector implements SettableSetObjectInspector { + + ObjectInspector setElementObjectInspector; + + /** + * Call ObjectInspectorFactory.getStandardSetObjectInspector instead. + */ + protected StandardSetObjectInspector( + ObjectInspector setElementObjectInspector) { + this.setElementObjectInspector = setElementObjectInspector; + } + + // ///////////////////////////// + // ObjectInspector + @Override + public final Category getCategory() { + return Category.SET; + } + + @Override + public String getTypeName() { + return org.apache.hadoop.hive.serde.Constants.SET_TYPE_NAME + "<" + + setElementObjectInspector.getTypeName() + ">"; + } + + // ///////////////////////////// + // SetObjectInspector + @Override + public ObjectInspector getSetElementObjectInspector() { + return setElementObjectInspector; + } + + @Override + public int getSetSize(Object data) { + if (data == null) { + return -1; + } + Set set = (Set) data; + return set.size(); + } + + @Override + public Set getSet(Object data) { + if (data == null) { + return null; + } + Set set = (Set) data; + return set; + } + + // ///////////////////////////// + // SettableSetObjectInspector + @Override + public Object create() { + Set set = new HashSet(); + return set; + } + + @Override + public Object add(Object set, Object element) { + Set realSet = (Set) set; + realSet.add(element); + return realSet; + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/SetTypeInfo.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/SetTypeInfo.java new file mode 100644 index 0000000..86d28e2 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/SetTypeInfo.java @@ -0,0 +1,89 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.Serializable; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; + +/** + * A Set Type has homogeneous elements. All elements of the Set has the same + * TypeInfo which is returned by getSetElementTypeInfo. + * + * Always use the TypeInfoFactory to create new TypeInfo objects, instead of + * directly creating an instance of this class. + */ +public final class SetTypeInfo extends TypeInfo implements Serializable { + + private static final long serialVersionUID = 1L; + private TypeInfo setElementTypeInfo; + + /** + * For java serialization use only. + */ + public SetTypeInfo() { + } + + @Override + public String getTypeName() { + return org.apache.hadoop.hive.serde.Constants.SET_TYPE_NAME + "<" + + setElementTypeInfo.getTypeName() + ">"; + } + + /** + * For java serialization use only. + */ + public void setSetElementTypeInfo(TypeInfo setElementTypeInfo) { + this.setElementTypeInfo = setElementTypeInfo; + } + + /** + * For TypeInfoFactory use only. + */ + SetTypeInfo(TypeInfo elementTypeInfo) { + setElementTypeInfo = elementTypeInfo; + } + + @Override + public Category getCategory() { + return Category.SET; + } + + public TypeInfo getSetElementTypeInfo() { + return setElementTypeInfo; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof SetTypeInfo)) { + return false; + } + return getSetElementTypeInfo().equals( + ((SetTypeInfo) other).getSetElementTypeInfo()); + } + + @Override + public int hashCode() { + return setElementTypeInfo.hashCode(); + } + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java index 594f5ab..ca02fa5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java @@ -122,6 +122,17 @@ public final class TypeInfoFactory { return result; } + static HashMap cachedSetTypeInfo = new HashMap(); + + public static TypeInfo getSetTypeInfo(TypeInfo elementTypeInfo) { + TypeInfo result = cachedSetTypeInfo.get(elementTypeInfo); + if (result == null) { + result = new SetTypeInfo(elementTypeInfo); + cachedSetTypeInfo.put(elementTypeInfo, result); + } + return result; + } + static HashMap, TypeInfo> cachedMapTypeInfo = new HashMap, TypeInfo>(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java index 6c07ab5..e713dbe 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java @@ -24,8 +24,10 @@ import java.lang.reflect.ParameterizedType; import java.lang.reflect.Type; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -34,10 +36,11 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.SetObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; @@ -75,6 +78,12 @@ public final class TypeInfoUtils { return TypeInfoFactory.getListTypeInfo(getExtendedTypeInfoFromJavaType( pt.getActualTypeArguments()[0], m)); } + // Set? + if (Set.class == (Class) pt.getRawType() + || HashSet.class == (Class) pt.getRawType()) { + return TypeInfoFactory.getSetTypeInfo(getExtendedTypeInfoFromJavaType( + pt.getActualTypeArguments()[0], m)); + } // Map? if (Map.class == (Class) pt.getRawType() || HashMap.class == (Class) pt.getRawType()) { @@ -292,6 +301,7 @@ public final class TypeInfoUtils { Token t = typeInfoTokens.get(iToken); if (item.equals("type")) { if (!Constants.LIST_TYPE_NAME.equals(t.text) + && !Constants.SET_TYPE_NAME.equals(t.text) && !Constants.MAP_TYPE_NAME.equals(t.text) && !Constants.STRUCT_TYPE_NAME.equals(t.text) && !Constants.UNION_TYPE_NAME.equals(t.text) @@ -339,6 +349,14 @@ public final class TypeInfoUtils { return TypeInfoFactory.getListTypeInfo(listElementType); } + // Is this a set type? + if (Constants.SET_TYPE_NAME.equals(t.text)) { + expect("<"); + TypeInfo setElementType = parseType(); + expect(">"); + return TypeInfoFactory.getSetTypeInfo(setElementType); + } + // Is this a map type? if (Constants.MAP_TYPE_NAME.equals(t.text)) { expect("<"); @@ -426,6 +444,14 @@ public final class TypeInfoUtils { .getStandardListObjectInspector(elementObjectInspector); break; } + case SET: { + ObjectInspector elementObjectInspector = + getStandardWritableObjectInspectorFromTypeInfo(((SetTypeInfo) typeInfo) + .getSetElementTypeInfo()); + result = ObjectInspectorFactory + .getStandardSetObjectInspector(elementObjectInspector); + break; + } case MAP: { MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; ObjectInspector keyObjectInspector = @@ -505,6 +531,14 @@ public final class TypeInfoUtils { .getStandardListObjectInspector(elementObjectInspector); break; } + case SET: { + ObjectInspector elementObjectInspector = + getStandardJavaObjectInspectorFromTypeInfo(((SetTypeInfo) typeInfo) + .getSetElementTypeInfo()); + result = ObjectInspectorFactory + .getStandardSetObjectInspector(elementObjectInspector); + break; + } case MAP: { MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo; ObjectInspector keyObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo @@ -585,6 +619,13 @@ public final class TypeInfoUtils { .getListElementObjectInspector())); break; } + case SET: { + SetObjectInspector soi = (SetObjectInspector) oi; + result = TypeInfoFactory + .getSetTypeInfo(getTypeInfoFromObjectInspector(soi + .getSetElementObjectInspector())); + break; + } case MAP: { MapObjectInspector moi = (MapObjectInspector) oi; result = TypeInfoFactory.getMapTypeInfo( diff --git serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSetObjectInspectors.java serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSetObjectInspectors.java new file mode 100644 index 0000000..50a29af --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSetObjectInspectors.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import junit.framework.TestCase; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.thrift.test.MegaStruct; +import org.apache.hadoop.hive.serde2.thrift.test.MiniStruct; +import org.apache.hadoop.hive.serde2.thrift.test.MyEnum; + +/** + * TestSetObjectInspectors. + * + */ +public class TestSetObjectInspectors extends TestCase { + + public void testSetObjectInspectors() throws Throwable { + + try { + ObjectInspector oi1 = ObjectInspectorFactory + .getReflectionObjectInspector(MegaStruct.class, + ObjectInspectorFactory.ObjectInspectorOptions.THRIFT); + ObjectInspector oi2 = ObjectInspectorFactory + .getReflectionObjectInspector(MegaStruct.class, + ObjectInspectorFactory.ObjectInspectorOptions.THRIFT); + assertEquals(oi1, oi2); + + // metadata + assertEquals(Category.STRUCT, oi1.getCategory()); + StructObjectInspector soi = (StructObjectInspector) oi1; + List fields = soi.getAllStructFieldRefs(); + assertEquals(20, fields.size()); + assertEquals(fields.get(19), soi.getStructFieldRef("my_structset")); + + // null + for (int i = 0; i < fields.size(); i++) { + assertNull(soi.getStructFieldData(null, fields.get(i))); + } + + // real object + MegaStruct ms = new MegaStruct(); + Set myStringSet = new HashSet(); + myStringSet.add("String1"); + myStringSet.add("String2"); + ms.setMy_stringset(myStringSet); + + Set myEnumSet = new HashSet(); + myEnumSet.add(MyEnum.ALPACA); + myEnumSet.add(MyEnum.LLAMA); + ms.setMy_enumset(myEnumSet); + + Set myMiniStructSet = new HashSet(); + MiniStruct miniStruct1 = new MiniStruct(); + miniStruct1.setMy_enum(MyEnum.ALPACA); + miniStruct1.setMy_string("string3"); + myMiniStructSet.add(miniStruct1); + ms.setMy_structset(myMiniStructSet); + + assertEquals(myStringSet, soi.getStructFieldData(ms, fields.get(17))); + assertEquals(myEnumSet, soi.getStructFieldData(ms, fields.get(18))); + assertEquals(myMiniStructSet, soi.getStructFieldData(ms, fields.get(19))); + + // element inspectors for String and Enum + assertEquals(PrimitiveObjectInspectorFactory.javaStringObjectInspector, + ((StandardSetObjectInspector)fields.get(17).getFieldObjectInspector()) + .getSetElementObjectInspector()); + assertEquals(PrimitiveObjectInspectorFactory.javaStringObjectInspector, + ((StandardSetObjectInspector)fields.get(17).getFieldObjectInspector()) + .getSetElementObjectInspector()); + + // sub fields for nested struct + StandardSetObjectInspector setOI = (StandardSetObjectInspector)fields.get(19) + .getFieldObjectInspector(); + StructObjectInspector fieldOI = (StructObjectInspector)setOI.getSetElementObjectInspector(); + List setFields = fieldOI.getAllStructFieldRefs(); + + assertEquals(2, setFields.size()); + assertEquals(1, setOI.getSetSize(myMiniStructSet)); + assertEquals(Category.SET, setOI.getCategory()); + assertEquals("set>", setOI.getTypeName()); + + // data for nested struct + Set readSetData = (Set)setOI.getSet(myMiniStructSet); + MiniStruct miniStruct2 = (MiniStruct)readSetData.toArray()[0]; + + assertEquals(1, readSetData.size()); + assertEquals(MyEnum.ALPACA, miniStruct2.getMy_enum()); + assertEquals("string3", miniStruct2.getMy_string()); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +}