Index: data/files/union_input.txt =================================================================== --- data/files/union_input.txt (revision 0) +++ data/files/union_input.txt (revision 0) @@ -0,0 +1,8 @@ +011oneone +12.02twotwo +2threefour3threefour +35five5fivefive +2sixseven6sixseven +38eight8eighteight +099ninenine +110.010tenten Index: serde/src/gen-py/org_apache_hadoop_hive_serde/constants.py =================================================================== --- serde/src/gen-py/org_apache_hadoop_hive_serde/constants.py (revision 1005331) +++ serde/src/gen-py/org_apache_hadoop_hive_serde/constants.py (working copy) @@ -65,6 +65,8 @@ STRUCT_TYPE_NAME = "struct" +UNION_TYPE_NAME = "uniontype" + LIST_COLUMNS = "columns" LIST_COLUMN_TYPES = "columns.types" Index: serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java (revision 1005331) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java (working copy) @@ -32,7 +32,7 @@ import org.apache.hadoop.io.Text; /** - * TestLazyArrayMapStruct. + * Tests LazyArray, LazyMap, LazyStruct and LazyUnion * */ public class TestLazyArrayMapStruct extends TestCase { @@ -244,4 +244,71 @@ } } + /** + * Test the LazyUnion class. + */ + public void testLazyUnion() throws Throwable { + try { + { + TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString( + "uniontype,map,string>"); + Text nullSequence = new Text("\\N"); + + ObjectInspector oi = LazyFactory.createLazyObjectInspector(typeInfo, + new byte[] {'^', ':', '='}, 0, nullSequence, false, (byte) 0); + LazyUnion o = (LazyUnion) LazyFactory.createLazyObject(oi); + + Text data; + + data = new Text("0^123"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals("{0:123}", SerDeUtils.getJSONString(o, oi)); + + data = new Text("1^a:b:c"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals( + "{1:[\"a\",\"b\",\"c\"]}", SerDeUtils.getJSONString(o, oi)); + + data = new Text("2^d=e:f=g"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals( + "{2:{\"d\":\"e\",\"f\":\"g\"}}", SerDeUtils.getJSONString(o, oi)); + + data = new Text("3^hi"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals("{3:\"hi\"}", SerDeUtils.getJSONString(o, oi)); + + + data = new Text("0^\\N"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals("{0:null}", SerDeUtils.getJSONString(o, oi)); + + data = new Text("1^ :a::"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals( + "{1:[\" \",\"a\",\"\",\"\"]}", SerDeUtils.getJSONString(o, oi)); + + data = new Text("2^d=\\N:f=g:h"); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals( + "{2:{\"d\":null,\"f\":\"g\",\"h\":null}}", + SerDeUtils.getJSONString(o, oi)); + + data = new Text("2^= "); + TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, + data.getLength()); + assertEquals("{2:{\"\":\" \"}}", SerDeUtils.getJSONString(o, oi)); + } + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } } Index: serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java (revision 1005331) +++ serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestStandardObjectInspectors.java (working copy) @@ -23,12 +23,17 @@ import junit.framework.TestCase; +import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -334,4 +339,180 @@ } + @SuppressWarnings("unchecked") + public void testStandardUnionObjectInspector() throws Throwable { + try { + ArrayList objectInspectors = new ArrayList(); + // add primitive types + objectInspectors + .add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); + objectInspectors + .add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + objectInspectors + .add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector); + + // add a list + objectInspectors + .add(ObjectInspectorFactory + .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector)); + + // add a map + objectInspectors + .add(ObjectInspectorFactory + .getStandardMapObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector, + PrimitiveObjectInspectorFactory.javaStringObjectInspector)); + + // add a struct + List fieldNames = new ArrayList(); + fieldNames.add("myDouble"); + fieldNames.add("myLong"); + ArrayList fieldObjectInspectors = new ArrayList(); + fieldObjectInspectors + .add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector); + fieldObjectInspectors + .add(PrimitiveObjectInspectorFactory.javaLongObjectInspector); + objectInspectors + .add(ObjectInspectorFactory + .getStandardStructObjectInspector(fieldNames, fieldObjectInspectors)); + + StandardUnionObjectInspector uoi1 = ObjectInspectorFactory + .getStandardUnionObjectInspector(objectInspectors); + StandardUnionObjectInspector uoi2 = ObjectInspectorFactory + .getStandardUnionObjectInspector( + (ArrayList) objectInspectors.clone()); + assertEquals(uoi1, uoi2); + assertEquals(ObjectInspectorUtils.getObjectInspectorName(uoi1), + ObjectInspectorUtils.getObjectInspectorName(uoi2)); + assertTrue(ObjectInspectorUtils.compareTypes(uoi1, uoi2)); + // compareSupported returns false because Union can contain + // an object of Map + assertFalse(ObjectInspectorUtils.compareSupported(uoi1)); + + // construct unionObjectInspector without Map field. + ArrayList ois = + (ArrayList) objectInspectors.clone(); + ois.set(4, PrimitiveObjectInspectorFactory.javaIntObjectInspector); + assertTrue(ObjectInspectorUtils.compareSupported(ObjectInspectorFactory + .getStandardUnionObjectInspector(ois))); + + // metadata + assertEquals(Category.UNION, uoi1.getCategory()); + List uois = uoi1.getObjectInspectors(); + assertEquals(6, uois.size()); + for (int i = 0; i < 6; i++) { + assertEquals(objectInspectors.get(i), uois.get(i)); + } + StringBuilder unionTypeName = new StringBuilder(); + unionTypeName.append("uniontype<"); + for (int i = 0; i < uois.size(); i++) { + if (i > 0) { + unionTypeName.append(","); + } + unionTypeName.append(uois.get(i).getTypeName()); + } + unionTypeName.append(">"); + assertEquals(unionTypeName.toString(), uoi1.getTypeName()); + // TypeInfo + TypeInfo typeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi1); + assertEquals(Category.UNION, typeInfo1.getCategory()); + assertEquals(UnionTypeInfo.class.getName(), typeInfo1.getClass().getName()); + assertEquals(typeInfo1.getTypeName(), uoi1.getTypeName()); + assertEquals(typeInfo1, + TypeInfoUtils.getTypeInfoFromTypeString(uoi1.getTypeName())); + TypeInfo typeInfo2 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi2); + assertEquals(typeInfo1, typeInfo2); + assertEquals(TypeInfoUtils. + getStandardJavaObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils. + getStandardJavaObjectInspectorFromTypeInfo(typeInfo2)); + assertEquals(TypeInfoUtils. + getStandardWritableObjectInspectorFromTypeInfo(typeInfo1), + TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo( + typeInfo2)); + + // null + assertNull(uoi1.getField(null)); + assertEquals(-1, uoi1.getTag(null)); + + // Union + UnionObject union = new StandardUnion((byte) 0, 1); + assertEquals(0, uoi1.getTag(union)); + assertEquals(1, uoi1.getField(union)); + assertEquals("{0:1}", SerDeUtils.getJSONString(union, uoi1)); + assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, + new StandardUnion((byte) 0, 1), uoi2)); + assertTrue(ObjectInspectorUtils.copyToStandardObject( + union, uoi1).equals(1)); + + union = new StandardUnion((byte) 1, "two"); + assertEquals(1, uoi1.getTag(union)); + assertEquals("two", uoi1.getField(union)); + assertEquals("{1:\"two\"}", SerDeUtils.getJSONString(union, uoi1)); + assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, + new StandardUnion((byte) 1, "two"), uoi2)); + assertTrue(ObjectInspectorUtils.copyToStandardObject( + union, uoi1).equals("two")); + + union = new StandardUnion((byte) 2, true); + assertEquals(2, uoi1.getTag(union)); + assertEquals(true, uoi1.getField(union)); + assertEquals("{2:true}", SerDeUtils.getJSONString(union, uoi1)); + assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, + new StandardUnion((byte) 2, true), uoi2)); + assertTrue(ObjectInspectorUtils.copyToStandardObject( + union, uoi1).equals(true)); + + ArrayList iList = new ArrayList(); + iList.add(4); + iList.add(5); + union = new StandardUnion((byte) 3, iList); + assertEquals(3, uoi1.getTag(union)); + assertEquals(iList, uoi1.getField(union)); + assertEquals("{3:[4,5]}", SerDeUtils.getJSONString(union, uoi1)); + assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, + new StandardUnion((byte) 3, iList.clone()), uoi2)); + assertTrue(ObjectInspectorUtils.copyToStandardObject( + union, uoi1).equals(iList)); + + HashMap map = new HashMap(); + map.put(6, "six"); + map.put(7, "seven"); + map.put(8, "eight"); + union = new StandardUnion((byte) 4, map); + assertEquals(4, uoi1.getTag(union)); + assertEquals(map, uoi1.getField(union)); + assertEquals("{4:{6:\"six\",7:\"seven\",8:\"eight\"}}", + SerDeUtils.getJSONString(union, uoi1)); + Throwable th = null; + try { + ObjectInspectorUtils.compare(union, uoi1, + new StandardUnion((byte) 4, map.clone()), uoi2); + } catch (Throwable t) { + th = t; + } + assertNotNull(th); + assertEquals("Compare on map type not supported!", th.getMessage()); + assertTrue(ObjectInspectorUtils.copyToStandardObject( + union, uoi1).equals(map)); + + + ArrayList struct = new ArrayList(2); + struct.add(9.0); + struct.add(10L); + union = new StandardUnion((byte) 5, struct); + assertEquals(5, uoi1.getTag(union)); + assertEquals(struct, uoi1.getField(union)); + assertEquals("{5:{\"mydouble\":9.0,\"mylong\":10}}", + SerDeUtils.getJSONString(union, uoi1)); + assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, + new StandardUnion((byte) 5, struct.clone()), uoi2)); + assertTrue(ObjectInspectorUtils.copyToStandardObject( + union, uoi1).equals(struct)); + + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } + } Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (working copy) @@ -24,6 +24,7 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; @@ -42,6 +43,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.Text; /** @@ -92,6 +94,8 @@ return new LazyArray((LazyListObjectInspector) oi); case STRUCT: return new LazyStruct((LazySimpleStructObjectInspector) oi); + case UNION: + return new LazyUnion((LazyUnionObjectInspector) oi); } throw new RuntimeException("Hive LazySerDe Internal error."); @@ -152,6 +156,16 @@ return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector( fieldNames, fieldObjectInspectors, separator[separatorIndex], nullSequence, false, escaped, escapeChar); + case UNION: + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List lazyOIs = new ArrayList(); + for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) { + lazyOIs.add(createLazyObjectInspector(uti, separator, + separatorIndex + 1, nullSequence, escaped, + escapeChar)); + } + return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs, + separator[separatorIndex], nullSequence, escaped, escapeChar); } throw new RuntimeException("Hive LazySerDe Internal error."); Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java (working copy) @@ -38,7 +38,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; @@ -501,6 +503,22 @@ } } return; + case UNION: + separator = (char) separators[level]; + UnionObjectInspector uoi = (UnionObjectInspector) objInspector; + List ois = uoi.getObjectInspectors(); + if (ois == null) { + out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); + } else { + LazyUtils.writePrimitiveUTF8(out, new Byte(uoi.getTag(obj)), + PrimitiveObjectInspectorFactory.javaByteObjectInspector, + escaped, escapeChar, needsEscape); + out.write(separator); + serialize(out, uoi.getField(obj), ois.get(uoi.getTag(obj)), + separators, level + 1, nullSequence, escaped, escapeChar, + needsEscape); + } + return; default: break; } Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (working copy) @@ -112,6 +112,29 @@ return result; } + static HashMap, LazyUnionObjectInspector> + cachedLazyUnionObjectInspector = + new HashMap, LazyUnionObjectInspector>(); + + public static LazyUnionObjectInspector getLazyUnionObjectInspector( + List ois, byte separator, Text nullSequence, + boolean escaped, byte escapeChar) { + List signature = new ArrayList(); + signature.add(ois); + signature.add(Byte.valueOf(separator)); + signature.add(nullSequence.toString()); + signature.add(Boolean.valueOf(escaped)); + signature.add(Byte.valueOf(escapeChar)); + LazyUnionObjectInspector result = cachedLazyUnionObjectInspector + .get(signature); + if (result == null) { + result = new LazyUnionObjectInspector(ois, separator, + nullSequence, escaped, escapeChar); + cachedLazyUnionObjectInspector.put(signature, result); + } + return result; + } + private LazyObjectInspectorFactory() { // prevent instantiation } Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyUnionObjectInspector.java (revision 0) @@ -0,0 +1,130 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.lazy.objectinspector; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.lazy.LazyUnion; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * LazyUnionObjectInspector works on union data that is stored in LazyUnion. + * + * Always use the {@link LazyObjectInspectorFactory} to create new + * ObjectInspector objects, instead of directly creating an instance of this + * class. + */ +public class LazyUnionObjectInspector implements UnionObjectInspector { + + public static final Log LOG = LogFactory + .getLog(LazyUnionObjectInspector.class.getName()); + + protected List ois; + + @Override + public String getTypeName() { + return ObjectInspectorUtils.getStandardUnionTypeName(this); + } + + byte separator; + Text nullSequence; + boolean escaped; + byte escapeChar; + + protected LazyUnionObjectInspector( + List ois, byte separator, + Text nullSequence, boolean escaped, + byte escapeChar) { + init(ois, separator, + nullSequence, escaped, escapeChar); + } + + protected void init( + List ois, byte separator, + Text nullSequence, boolean escaped, + byte escapeChar) { + this.separator = separator; + this.nullSequence = nullSequence; + this.escaped = escaped; + this.escapeChar = escapeChar; + this.ois = new ArrayList(); + this.ois.addAll(ois); + } + + protected LazyUnionObjectInspector(List ois, + byte separator, Text nullSequence) { + init(ois, separator, nullSequence); + } + + protected void init(List ois, byte separator, + Text nullSequence) { + this.separator = separator; + this.nullSequence = nullSequence; + this.ois = new ArrayList(); + this.ois.addAll(ois); + } + + @Override + public final Category getCategory() { + return Category.UNION; + } + + public byte getSeparator() { + return separator; + } + + public Text getNullSequence() { + return nullSequence; + } + + public boolean isEscaped() { + return escaped; + } + + public byte getEscapeChar() { + return escapeChar; + } + + @Override + public Object getField(Object data) { + if (data == null) { + return null; + } + return ((LazyUnion) data).getField(); + } + + @Override + public List getObjectInspectors() { + return ois; + } + + @Override + public byte getTag(Object data) { + if (data == null) { + return -1; + } + return ((LazyUnion) data).getTag(); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java (revision 0) @@ -0,0 +1,165 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazy; + +import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.Text; + +/** + * LazyObject for storing a union. The field of a union can be primitive or + * non-primitive. + * + */ +public class LazyUnion extends + LazyNonPrimitive { + /** + * Whether the data is already parsed or not. + */ + private boolean parsed; + + /** + * The start position of union field. Only valid when the data is parsed. + */ + private int startPosition; + + /** + * The object of the union. + */ + private LazyObject field; + + /** + * Tag of the Union + */ + private byte tag; + + /** + * Whether init() has been called on the field or not. + */ + private boolean fieldInited = false; + + /** + * Construct a LazyUnion object with the ObjectInspector. + */ + public LazyUnion(LazyUnionObjectInspector oi) { + super(oi); + } + + /** + * Set the row data for this LazyUnion. + * + * @see LazyObject#init(ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + super.init(bytes, start, length); + parsed = false; + } + + /** + * Parse the byte[] and fill each field. + */ + @SuppressWarnings("unchecked") + private void parse() { + + byte separator = oi.getSeparator(); + boolean isEscaped = oi.isEscaped(); + byte escapeChar = oi.getEscapeChar(); + boolean tagStarted = false; + boolean tagParsed = false; + int tagStart = -1; + int tagEnd = -1; + + int unionByteEnd = start + length; + int fieldByteEnd = start; + byte[] bytes = this.bytes.getData(); + // Go through all bytes in the byte[] + while (fieldByteEnd < unionByteEnd) { + if (bytes[fieldByteEnd] != separator) { + if (isEscaped && bytes[fieldByteEnd] == escapeChar + && fieldByteEnd + 1 < unionByteEnd) { + // ignore the char after escape_char + fieldByteEnd += 1; + } else { + if (!tagStarted) { + tagStart = fieldByteEnd; + tagStarted = true; + } + } + } else { // (bytes[fieldByteEnd] == separator) + if (!tagParsed) { + // Reached the end of the tag + tagEnd = fieldByteEnd - 1; + startPosition = fieldByteEnd + 1; + tagParsed = true; + } + } + fieldByteEnd++; + } + + tag = LazyByte.parseByte(bytes, tagStart, (tagEnd - tagStart) + 1); + field = LazyFactory.createLazyObject(oi.getObjectInspectors().get(tag)); + fieldInited = false; + parsed = true; + } + + /** + * Get the field out of the row without checking parsed. + * + * @return The value of the field + */ + private Object uncheckedGetField() { + Text nullSequence = oi.getNullSequence(); + int fieldLength = start + length - startPosition; + if (fieldLength != 0 && fieldLength == nullSequence.getLength() && + LazyUtils.compare(bytes.getData(), startPosition, fieldLength, + nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) { + return null; + } + + if (!fieldInited) { + fieldInited = true; + field.init(bytes, startPosition, fieldLength); + } + return field.getObject(); + } + + /** + * Get the field out of the union. + * + * @return The field as a LazyObject + */ + public Object getField() { + if (!parsed) { + parse(); + } + return uncheckedGetField(); + } + + /** + * Get the tag of the union + * + * @return The tag byte + */ + public byte getTag() { + if (!parsed) { + parse(); + } + return tag; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/BinarySortableSerDe.java (working copy) @@ -41,6 +41,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; @@ -56,6 +58,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; @@ -390,6 +393,17 @@ } return r; } + case UNION: { + UnionTypeInfo utype = (UnionTypeInfo) type; + StandardUnion r = reuse == null ? new StandardUnion() + : (StandardUnion) reuse; + // Read the tag + byte tag = buffer.read(invert); + r.setTag(tag); + r.setObject(deserialize(buffer, utype.getAllUnionObjectTypeInfos().get(tag), + invert, null)); + return r; + } default: { throw new RuntimeException("Unrecognized type: " + type.getCategory()); } @@ -570,6 +584,14 @@ } return; } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + byte tag = uoi.getTag(o); + buffer.write(tag, invert); + serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), + invert); + return; + } default: { throw new RuntimeException("Unrecognized type: " + oi.getCategory()); } Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionObjectInspector.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionObjectInspector.java (revision 0) @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.List; + +/** + * UnionObjectInspector works on union data that is stored as UnionObject. + * + * It holds the list of the object inspectors corresponding to each type of the + * object the Union can hold. + * + * UnionObjectInspector. + * + */ +public interface UnionObjectInspector extends ObjectInspector { + + /** + * Returns the array of ObjectInspectors that are for each of the tags. + */ + List getObjectInspectors(); + + /** + * Return the tag of the object. + */ + byte getTag(Object o); + + /** + * Return the field based on the tag associated with the Object. + */ + Object getField(Object o); + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (working copy) @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde.Constants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; @@ -137,6 +138,15 @@ fieldNames, fieldObjectInspectors); break; } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + List ois = new ArrayList(); + for (ObjectInspector eoi : uoi.getObjectInspectors()) { + ois.add(getStandardObjectInspector(eoi, objectInspectorOption)); + } + result = ObjectInspectorFactory.getStandardUnionObjectInspector(ois); + break; + } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } @@ -243,6 +253,16 @@ result = struct; break; } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector)oi; + List objectInspectors = uoi.getObjectInspectors(); + Object object = copyToStandardObject( + uoi.getField(o), + objectInspectors.get(uoi.getTag(o)), + objectInspectorOption); + result = object; + break; + } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } @@ -266,6 +286,20 @@ return sb.toString(); } + public static String getStandardUnionTypeName(UnionObjectInspector uoi) { + StringBuilder sb = new StringBuilder(); + sb.append(Constants.UNION_TYPE_NAME + "<"); + List ois = uoi.getObjectInspectors(); + for(int i = 0; i < ois.size(); i++) { + if (i > 0) { + sb.append(","); + } + sb.append(ois.get(i).getTypeName()); + } + sb.append(">"); + return sb.toString(); + } + public static StructField getStandardStructFieldRef(String fieldName, List fields) { fieldName = fieldName.toLowerCase(); @@ -344,6 +378,20 @@ } return result.toString(); } + case UNION: { + StringBuffer result = new StringBuffer(); + result.append(oi.getClass().getSimpleName() + "<"); + UnionObjectInspector uoi = (UnionObjectInspector)oi; + List ois = uoi.getObjectInspectors(); + for (int i = 0; i < ois.size(); i++) { + if (i > 0) { + result.append(","); + } + result.append(getObjectInspectorName(ois.get(i))); + } + result.append(">"); + return result.toString(); + } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } @@ -400,6 +448,7 @@ case STRUCT: case LIST: case MAP: + case UNION: default: throw new RuntimeException( "Hash code on complex types not supported yet."); @@ -447,6 +496,14 @@ return true; case MAP: return false; + case UNION: + UnionObjectInspector uoi = (UnionObjectInspector) oi; + for (ObjectInspector eoi : uoi.getObjectInspectors()) { + if (!compareSupported(eoi)) { + return false; + } + } + return true; default: return false; } @@ -565,6 +622,18 @@ case MAP: { throw new RuntimeException("Compare on map type not supported!"); } + case UNION: { + UnionObjectInspector uoi1 = (UnionObjectInspector) oi1; + UnionObjectInspector uoi2 = (UnionObjectInspector) oi2; + byte tag1 = uoi1.getTag(o1); + byte tag2 = uoi2.getTag(o2); + if (tag1 != tag2) { + return tag1 - tag2; + } + return compare(uoi1.getField(o1), + uoi1.getObjectInspectors().get(tag1), + uoi2.getField(o2), uoi2.getObjectInspectors().get(tag2)); + } default: throw new RuntimeException("Compare on unknown type: " + oi1.getCategory()); @@ -714,6 +783,29 @@ return true; } + if (c1.equals(Category.UNION)) { + UnionObjectInspector uoi1 = (UnionObjectInspector) o1; + UnionObjectInspector uoi2 = (UnionObjectInspector) o2; + List ois1 = uoi1.getObjectInspectors(); + List ois2 = uoi2.getObjectInspectors(); + + if (ois1 == null && ois2 == null) { + return true; + } else if (ois1 == null || ois2 == null) { + return false; + } else if (ois1.size() != ois2.size()) { + return false; + } + Iterator it1 = ois1.iterator(); + Iterator it2 = ois2.iterator(); + while (it1.hasNext()) { + if (!compareTypes(it1.next(), it2.next())) { + return false; + } + } + return true; + } + // Unknown category throw new RuntimeException("Unknown category encountered: " + c1); } Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionObject.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionObject.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionObject.java (revision 0) @@ -0,0 +1,24 @@ +package org.apache.hadoop.hive.serde2.objectinspector; + +/** + * The UnionObject. + * + * It has tag followed by the object it is holding. + * + */ +public interface UnionObject { + /** + * Get the tag of the union. + * + * @return the tag byte + */ + byte getTag(); + + /** + * Get the Object. + * + * @return The Object union is holding + */ + Object getObject(); + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (working copy) @@ -223,6 +223,21 @@ return result; } + static HashMap, StandardUnionObjectInspector> + cachedStandardUnionObjectInspector = + new HashMap, StandardUnionObjectInspector>(); + + public static StandardUnionObjectInspector getStandardUnionObjectInspector( + List unionObjectInspectors) { + StandardUnionObjectInspector result = cachedStandardUnionObjectInspector + .get(unionObjectInspectors); + if (result == null) { + result = new StandardUnionObjectInspector(unionObjectInspectors); + cachedStandardUnionObjectInspector.put(unionObjectInspectors, result); + } + return result; + } + static HashMap>, StandardStructObjectInspector> cachedStandardStructObjectInspector = new HashMap>, StandardStructObjectInspector>(); public static StandardStructObjectInspector getStandardStructObjectInspector( Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardUnionObjectInspector.java (revision 0) @@ -0,0 +1,116 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.objectinspector; + +import java.util.List; + +/** + * StandardUnionObjectInspector works on union data that is stored as + * UnionObject. + * It holds the list of the object inspectors corresponding to each type of the + * object the Union can hold. The UniobObject has tag followed by the object + * it is holding. + * + * Always use the {@link ObjectInspectorFactory} to create new ObjectInspector + * objects, instead of directly creating an instance of this class. + */ +public class StandardUnionObjectInspector implements UnionObjectInspector { + List ois; + + public StandardUnionObjectInspector(List ois) { + this.ois = ois; + } + + public List getObjectInspectors() { + return ois; + } + + public static class StandardUnion implements UnionObject { + protected byte tag; + protected Object object; + + public StandardUnion() { + } + + public StandardUnion(byte tag, Object object) { + this.tag = tag; + this.object = object; + } + + public void setObject(Object o) { + this.object = o; + } + + public void setTag(byte tag) { + this.tag = tag; + } + + @Override + public Object getObject() { + return object; + } + + @Override + public byte getTag() { + return tag; + } + + @Override + public String toString() { + return tag + ":" + object; + } + } + + /** + * Return the tag of the object. + */ + public byte getTag(Object o) { + if (o == null) { + return -1; + } + return ((UnionObject) o).getTag(); + } + + /** + * Return the field based on the tag value associated with the Object. + */ + public Object getField(Object o) { + if (o == null) { + return null; + } + return ((UnionObject) o).getObject(); + } + + public Category getCategory() { + return Category.UNION; + } + + public String getTypeName() { + return ObjectInspectorUtils.getStandardUnionTypeName(this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append(getClass().getName()); + sb.append(getTypeName()); + return sb.toString(); + } + +} Index: serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspector.java (working copy) @@ -42,7 +42,7 @@ * */ public static enum Category { - PRIMITIVE, LIST, MAP, STRUCT + PRIMITIVE, LIST, MAP, STRUCT, UNION }; /** Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoFactory.java (working copy) @@ -97,6 +97,18 @@ return result; } + static HashMap, TypeInfo> cachedUnionTypeInfo = + new HashMap, TypeInfo>(); + + public static TypeInfo getUnionTypeInfo(List typeInfos) { + TypeInfo result = cachedUnionTypeInfo.get(typeInfos); + if (result == null) { + result = new UnionTypeInfo(typeInfos); + cachedUnionTypeInfo.put(typeInfos, result); + } + return result; + } + static HashMap cachedListTypeInfo = new HashMap(); public static TypeInfo getListTypeInfo(TypeInfo elementTypeInfo) { Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/UnionTypeInfo.java (revision 0) @@ -0,0 +1,87 @@ +package org.apache.hadoop.hive.serde2.typeinfo; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; + +/** + * UnionTypeInfo represents the TypeInfo of an union. A union holds only one + * field of the specified fields at any point of time. The fields, a Union can + * hold, can have the same or different TypeInfo. + * + * Always use the TypeInfoFactory to create new TypeInfo objects, instead of + * directly creating an instance of this class. + */ +public class UnionTypeInfo extends TypeInfo implements Serializable { + + private static final long serialVersionUID = 1L; + + private List allUnionObjectTypeInfos; + + /** + * For java serialization use only. + */ + public UnionTypeInfo() { + } + + @Override + public String getTypeName() { + StringBuilder sb = new StringBuilder(); + sb.append(Constants.UNION_TYPE_NAME + "<"); + for (int i = 0; i < allUnionObjectTypeInfos.size(); i++) { + if (i > 0) { + sb.append(","); + } + sb.append(allUnionObjectTypeInfos.get(i).getTypeName()); + } + sb.append(">"); + return sb.toString(); + } + + /** + * For java serialization use only. + */ + public void setAllUnionObjectTypeInfos( + List allUnionObjectTypeInfos) { + this.allUnionObjectTypeInfos = allUnionObjectTypeInfos; + } + + /** + * For TypeInfoFactory use only. + */ + UnionTypeInfo(List typeInfos) { + allUnionObjectTypeInfos = new ArrayList(); + allUnionObjectTypeInfos.addAll(typeInfos); + } + + @Override + public Category getCategory() { + return Category.UNION; + } + + public List getAllUnionObjectTypeInfos() { + return allUnionObjectTypeInfos; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof UnionTypeInfo)) { + return false; + } + UnionTypeInfo o = (UnionTypeInfo) other; + + // Compare the field types + return o.getAllUnionObjectTypeInfos().equals(getAllUnionObjectTypeInfos()); + } + + @Override + public int hashCode() { + return allUnionObjectTypeInfos.hashCode(); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfo.java (working copy) @@ -25,11 +25,11 @@ /** * Stores information about a type. Always use the TypeInfoFactory to create new * TypeInfo objects. - * - * We support 4 categories of types: 1. Primitive objects (String, Number, etc) + * + * We support 5 categories of types: 1. Primitive objects (String, Number, etc) * 2. List objects (a list of objects of a single type) 3. Map objects (a map * from objects of one type to objects of another type) 4. Struct objects (a - * list of fields with names and their own types) + * list of fields with names and their own types) 5. Union objects */ public abstract class TypeInfo implements Serializable { @@ -39,8 +39,8 @@ } /** - * The Category of this TypeInfo. Possible values are Primitive, List, Map and - * Struct, which corresponds to the 4 sub-classes of TypeInfo. + * The Category of this TypeInfo. Possible values are Primitive, List, Map, + * Struct and Union, which corresponds to the 5 sub-classes of TypeInfo. */ public abstract Category getCategory(); Index: serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/typeinfo/TypeInfoUtils.java (working copy) @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; @@ -276,6 +277,7 @@ if (!Constants.LIST_TYPE_NAME.equals(t.text) && !Constants.MAP_TYPE_NAME.equals(t.text) && !Constants.STRUCT_TYPE_NAME.equals(t.text) + && !Constants.UNION_TYPE_NAME.equals(t.text) && null == PrimitiveObjectInspectorUtils .getTypeEntryFromTypeName(t.text) && !t.text.equals(alternative)) { @@ -354,7 +356,27 @@ return TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); } + // Is this a union type? + if (Constants.UNION_TYPE_NAME.equals(t.text)) { + List objectTypeInfos = new ArrayList(); + boolean first = true; + do { + if (first) { + expect("<"); + first = false; + } else { + Token separator = expect(">", ","); + if (separator.text.equals(">")) { + // end of union + break; + } + } + objectTypeInfos.add(parseType()); + } while (true); + return TypeInfoFactory.getUnionTypeInfo(objectTypeInfos); + } + throw new RuntimeException("Internal error parsing position " + t.position + " of '" + typeInfoString + "'"); } @@ -413,6 +435,22 @@ fieldNames, fieldObjectInspectors); break; } + case UNION: { + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List objectTypeInfos = unionTypeInfo + .getAllUnionObjectTypeInfos(); + List fieldObjectInspectors = + new ArrayList(objectTypeInfos.size()); + for (int i = 0; i < objectTypeInfos.size(); i++) { + fieldObjectInspectors + .add(getStandardWritableObjectInspectorFromTypeInfo(objectTypeInfos + .get(i))); + } + result = ObjectInspectorFactory.getStandardUnionObjectInspector( + fieldObjectInspectors); + break; + } + default: { result = null; } @@ -476,7 +514,22 @@ fieldNames, fieldObjectInspectors); break; } - default: { + case UNION: { + UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; + List objectTypeInfos = unionTypeInfo + .getAllUnionObjectTypeInfos(); + List fieldObjectInspectors = + new ArrayList(objectTypeInfos.size()); + for (int i = 0; i < objectTypeInfos.size(); i++) { + fieldObjectInspectors + .add(getStandardJavaObjectInspectorFromTypeInfo(objectTypeInfos + .get(i))); + } + result = ObjectInspectorFactory.getStandardUnionObjectInspector( + fieldObjectInspectors); + break; + } + default: { result = null; } } @@ -532,6 +585,15 @@ result = TypeInfoFactory.getStructTypeInfo(fieldNames, fieldTypeInfos); break; } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + List objectTypeInfos = new ArrayList(); + for (ObjectInspector eoi : uoi.getObjectInspectors()) { + objectTypeInfos.add(getTypeInfoFromObjectInspector(eoi)); + } + result = TypeInfoFactory.getUnionTypeInfo(objectTypeInfos); + break; + } default: { throw new RuntimeException("Unknown ObjectInspector category!"); } Index: serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (revision 1005331) +++ serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; @@ -326,6 +327,20 @@ } break; } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + if (o == null) { + sb.append("null"); + } else { + sb.append(LBRACE); + sb.append(uoi.getTag(o)); + sb.append(COLON); + buildJSONString(sb, uoi.getField(o), + uoi.getObjectInspectors().get(uoi.getTag(o))); + sb.append(RBRACE); + } + break; + } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } @@ -412,6 +427,19 @@ return false; } } + case UNION: { + UnionObjectInspector uoi = (UnionObjectInspector) oi; + if (o == null) { + return true; + } else { + // there are no elements in the union + if (uoi.getObjectInspectors().size() == 0) { + return false; + } + return hasAnyNullObject(uoi.getField(o), + uoi.getObjectInspectors().get(uoi.getTag(o))); + } + } default: throw new RuntimeException("Unknown type in ObjectInspector!"); } Index: serde/src/gen-java/org/apache/hadoop/hive/serde/Constants.java =================================================================== --- serde/src/gen-java/org/apache/hadoop/hive/serde/Constants.java (revision 1005331) +++ serde/src/gen-java/org/apache/hadoop/hive/serde/Constants.java (working copy) @@ -74,6 +74,8 @@ public static final String STRUCT_TYPE_NAME = "struct"; + public static final String UNION_TYPE_NAME = "uniontype"; + public static final String LIST_COLUMNS = "columns"; public static final String LIST_COLUMN_TYPES = "columns.types"; Index: serde/src/gen-cpp/serde_constants.cpp =================================================================== --- serde/src/gen-cpp/serde_constants.cpp (revision 1005331) +++ serde/src/gen-cpp/serde_constants.cpp (working copy) @@ -24,6 +24,8 @@ SERIALIZATION_SORT_ORDER = "serialization.sort.order"; + SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object"; + FIELD_DELIM = "field.delim"; COLLECTION_DELIM = "colelction.delim"; @@ -66,6 +68,8 @@ STRUCT_TYPE_NAME = "struct"; + UNION_TYPE_NAME = "uniontype"; + LIST_COLUMNS = "columns"; LIST_COLUMN_TYPES = "columns.types"; Index: serde/src/gen-cpp/serde_constants.h =================================================================== --- serde/src/gen-cpp/serde_constants.h (revision 1005331) +++ serde/src/gen-cpp/serde_constants.h (working copy) @@ -21,6 +21,7 @@ std::string SERIALIZATION_NULL_FORMAT; std::string SERIALIZATION_LAST_COLUMN_TAKES_REST; std::string SERIALIZATION_SORT_ORDER; + std::string SERIALIZATION_USE_JSON_OBJECTS; std::string FIELD_DELIM; std::string COLLECTION_DELIM; std::string LINE_DELIM; @@ -42,6 +43,7 @@ std::string LIST_TYPE_NAME; std::string MAP_TYPE_NAME; std::string STRUCT_TYPE_NAME; + std::string UNION_TYPE_NAME; std::string LIST_COLUMNS; std::string LIST_COLUMN_TYPES; std::set PrimitiveTypes; Index: serde/src/gen-cpp/serde_types.h =================================================================== --- serde/src/gen-cpp/serde_types.h (revision 1005331) +++ serde/src/gen-cpp/serde_types.h (working copy) @@ -7,6 +7,7 @@ #define serde_TYPES_H #include +#include #include #include Index: serde/if/serde.thrift =================================================================== --- serde/if/serde.thrift (revision 1005331) +++ serde/if/serde.thrift (working copy) @@ -40,6 +40,7 @@ const string LIST_TYPE_NAME = "array"; const string MAP_TYPE_NAME = "map"; const string STRUCT_TYPE_NAME = "struct"; +const string UNION_TYPE_NAME = "uniontype"; const string LIST_COLUMNS = "columns"; const string LIST_COLUMN_TYPES = "columns.types"; Index: ql/src/test/results/clientpositive/udf_union.q.out =================================================================== --- ql/src/test/results/clientpositive/udf_union.q.out (revision 0) +++ ql/src/test/results/clientpositive/udf_union.q.out (revision 0) @@ -0,0 +1,73 @@ +PREHOOK: query: DESCRIBE FUNCTION create_union +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION create_union +POSTHOOK: type: DESCFUNCTION +create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag +PREHOOK: query: DESCRIBE FUNCTION EXTENDED create_union +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED create_union +POSTHOOK: type: DESCFUNCTION +create_union(tag, obj1, obj2, obj3, ...) - Creates a union with the object for given tag +Example: + > SELECT create_union(1, 1, "one") FROM src LIMIT 1; + one +PREHOOK: query: EXPLAIN +SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), +create_union(1, "a", struct(2, "b")) +FROM src LIMIT 2 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), +create_union(1, "a", struct(2, "b")) +FROM src LIMIT 2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION create_union 0 (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION create_union (TOK_FUNCTION if (< (TOK_TABLE_OR_COL key) 100) 0 1) 2.0 (TOK_TABLE_OR_COL value))) (TOK_SELEXPR (TOK_FUNCTION create_union 1 "a" (TOK_FUNCTION struct 2 "b")))) (TOK_LIMIT 2))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: create_union(0,key) + type: uniontype + expr: create_union(if((key < 100), 0, 1),2.0,value) + type: uniontype + expr: create_union(1,'a',struct(2,'b')) + type: uniontype> + outputColumnNames: _col0, _col1, _col2 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: 2 + + +PREHOOK: query: SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), +create_union(1, "a", struct(2, "b")) +FROM src LIMIT 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/tmp/amarsri/hive_2010-10-07_21-34-09_385_8152840394893561074/-mr-10000 +POSTHOOK: query: SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), +create_union(1, "a", struct(2, "b")) +FROM src LIMIT 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-10-07_21-34-09_385_8152840394893561074/-mr-10000 +{0:"238"} {1:"val_238"} {1:{"col1":2,"col2":"b"}} +{0:"86"} {0:2.0} {1:{"col1":2,"col2":"b"}} Index: ql/src/test/results/clientpositive/show_functions.q.out =================================================================== --- ql/src/test/results/clientpositive/show_functions.q.out (revision 1005331) +++ ql/src/test/results/clientpositive/show_functions.q.out (working copy) @@ -44,6 +44,7 @@ count covar_pop covar_samp +create_union date_add date_sub datediff @@ -176,6 +177,7 @@ count covar_pop covar_samp +create_union PREHOOK: query: SHOW FUNCTIONS '.*e$' PREHOOK: type: SHOWFUNCTIONS POSTHOOK: query: SHOW FUNCTIONS '.*e$' Index: ql/src/test/results/clientpositive/create_union_table.q.out =================================================================== --- ql/src/test/results/clientpositive/create_union_table.q.out (revision 0) +++ ql/src/test/results/clientpositive/create_union_table.q.out (revision 0) @@ -0,0 +1,55 @@ +PREHOOK: query: explain create table abc(mydata uniontype,struct>, +strct struct) +PREHOOK: type: CREATETABLE +POSTHOOK: query: explain create table abc(mydata uniontype,struct>, +strct struct) +POSTHOOK: type: CREATETABLE +ABSTRACT SYNTAX TREE: + (TOK_CREATETABLE abc TOK_LIKETABLE (TOK_TABCOLLIST (TOK_TABCOL mydata (TOK_UNIONTYPE (TOK_COLTYPELIST TOK_INT TOK_DOUBLE (TOK_LIST TOK_STRING) (TOK_STRUCT (TOK_TABCOLLIST (TOK_TABCOL a TOK_INT) (TOK_TABCOL b TOK_STRING)))))) (TOK_TABCOL strct (TOK_STRUCT (TOK_TABCOLLIST (TOK_TABCOL a TOK_INT) (TOK_TABCOL b TOK_STRING) (TOK_TABCOL c TOK_STRING)))))) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Create Table Operator: + Create Table + columns: mydata uniontype,struct>, strct struct + if not exists: false + input format: org.apache.hadoop.mapred.TextInputFormat + # buckets: -1 + output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat + name: abc + isExternal: false + + +PREHOOK: query: create table abc(mydata uniontype,struct>, +strct struct) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table abc(mydata uniontype,struct>, +strct struct) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@abc +PREHOOK: query: load data local inpath '../data/files/union_input.txt' +overwrite into table abc +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/union_input.txt' +overwrite into table abc +POSTHOOK: type: LOAD +POSTHOOK: Output: default@abc +PREHOOK: query: SELECT * FROM abc +PREHOOK: type: QUERY +PREHOOK: Input: default@abc +PREHOOK: Output: file:/tmp/amarsri/hive_2010-09-28_21-39-36_818_1147210259746108693/-mr-10000 +POSTHOOK: query: SELECT * FROM abc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@abc +POSTHOOK: Output: file:/tmp/amarsri/hive_2010-09-28_21-39-36_818_1147210259746108693/-mr-10000 +{0:1} {"a":1,"b":"one","c":"one"} +{1:2.0} {"a":2,"b":"two","c":"two"} +{2:["three","four"]} {"a":3,"b":"three","c":"four"} +{3:{"a":5,"b":"five"}} {"a":5,"b":"five","c":"five"} +{2:["six","seven"]} {"a":6,"b":"six","c":"seven"} +{3:{"a":8,"b":"eight"}} {"a":8,"b":"eight","c":"eight"} +{0:9} {"a":9,"b":"nine","c":"nine"} +{1:10.0} {"a":10,"b":"ten","c":"ten"} Index: ql/src/test/queries/clientpositive/udf_union.q =================================================================== --- ql/src/test/queries/clientpositive/udf_union.q (revision 0) +++ ql/src/test/queries/clientpositive/udf_union.q (revision 0) @@ -0,0 +1,11 @@ +DESCRIBE FUNCTION create_union; +DESCRIBE FUNCTION EXTENDED create_union; + +EXPLAIN +SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), +create_union(1, "a", struct(2, "b")) +FROM src LIMIT 2; + +SELECT create_union(0, key), create_union(if(key<100, 0, 1), 2.0, value), +create_union(1, "a", struct(2, "b")) +FROM src LIMIT 2; Index: ql/src/test/queries/clientpositive/create_union_table.q =================================================================== --- ql/src/test/queries/clientpositive/create_union_table.q (revision 0) +++ ql/src/test/queries/clientpositive/create_union_table.q (revision 0) @@ -0,0 +1,10 @@ +explain create table abc(mydata uniontype,struct>, +strct struct); + +create table abc(mydata uniontype,struct>, +strct struct); + +load data local inpath '../data/files/union_input.txt' +overwrite into table abc; + +SELECT * FROM abc; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (revision 1005331) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java (working copy) @@ -175,6 +175,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSize; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFSplit; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFStruct; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUnion; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFWhen; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTFExplode; @@ -391,6 +392,7 @@ registerGenericUDF("array", GenericUDFArray.class); registerGenericUDF("map", GenericUDFMap.class); registerGenericUDF("struct", GenericUDFStruct.class); + registerGenericUDF("create_union", GenericUDFUnion.class); registerGenericUDF("case", GenericUDFCase.class); registerGenericUDF("when", GenericUDFWhen.class); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (revision 1005331) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (working copy) @@ -85,6 +85,8 @@ TOK_LIST; TOK_STRUCT; TOK_MAP; +TOK_UNIONTYPE; +TOK_COLTYPELIST; TOK_CREATEDATABASE; TOK_CREATETABLE; TOK_CREATEINDEX; @@ -945,11 +947,18 @@ : type ; +colTypeList +@init { msgs.push("column type list"); } +@after { msgs.pop(); } + : colType (COMMA colType)* -> ^(TOK_COLTYPELIST colType+) + ; + type : primitiveType | listType | structType - | mapType; + | mapType + | unionType; primitiveType @init { msgs.push("primitive type specification"); } @@ -986,6 +995,12 @@ -> ^(TOK_MAP $left $right) ; +unionType +@init { msgs.push("uniontype type"); } +@after { msgs.pop(); } + : KW_UNIONTYPE LESSTHAN colTypeList GREATERTHAN -> ^(TOK_UNIONTYPE colTypeList) + ; + queryOperator @init { msgs.push("query operator"); } @after { msgs.pop(); } @@ -1410,7 +1425,7 @@ @init { msgs.push("function name"); } @after { msgs.pop(); } : // Keyword IF is also a function name - Identifier | KW_IF | KW_ARRAY | KW_MAP | KW_STRUCT + Identifier | KW_IF | KW_ARRAY | KW_MAP | KW_STRUCT | KW_UNIONTYPE ; castExpression @@ -1660,6 +1675,7 @@ | KW_ARRAY | KW_MAP | KW_STRUCT + | KW_UNIONTYPE | EQUAL | NOTEQUAL | LESSTHANOREQUALTO @@ -1769,6 +1785,7 @@ KW_ARRAY: 'ARRAY'; KW_STRUCT: 'STRUCT'; KW_MAP: 'MAP'; +KW_UNIONTYPE: 'UNIONTYPE'; KW_REDUCE: 'REDUCE'; KW_PARTITIONED: 'PARTITIONED'; KW_CLUSTERED: 'CLUSTERED'; Index: ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (revision 1005331) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/BaseSemanticAnalyzer.java (working copy) @@ -516,6 +516,8 @@ + getTypeStringFromAST((ASTNode) typeNode.getChild(1)) + ">"; case HiveParser.TOK_STRUCT: return getStructTypeStringFromAST(typeNode); + case HiveParser.TOK_UNIONTYPE: + return getUnionTypeStringFromAST(typeNode); default: return DDLSemanticAnalyzer.getTypeName(typeNode.getType()); } @@ -542,6 +544,24 @@ return typeStr; } + private static String getUnionTypeStringFromAST(ASTNode typeNode) + throws SemanticException { + String typeStr = Constants.UNION_TYPE_NAME + "<"; + typeNode = (ASTNode) typeNode.getChild(0); + int children = typeNode.getChildCount(); + if (children <= 0) { + throw new SemanticException("empty union not allowed."); + } + for (int i = 0; i < children; i++) { + typeStr += getTypeStringFromAST((ASTNode) typeNode.getChild(i)); + if (i < children - 1) { + typeStr += ","; + } + } + typeStr += ">"; + return typeStr; + } + /** * tableSpec. * Index: ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFUnion.java (revision 0) @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.Arrays; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; + +@Description(name = "create_union", value = "_FUNC_(tag, obj1, obj2, obj3, ...)" + + " - Creates a union with the object for given tag", + extended = "Example:\n" + + " > SELECT _FUNC_(1, 1, \"one\") FROM src LIMIT 1;\n" + " one") +public class GenericUDFUnion extends GenericUDF { + Log LOG = LogFactory.getLog("GenericUDFUnion"); + ObjectInspector tagOI; + + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) + throws UDFArgumentException { + tagOI = arguments[0]; + ObjectInspector[] unionOIs = new ObjectInspector[arguments.length-1]; + for (int i = 1; i < arguments.length; i++) { + unionOIs[i-1] = arguments[i]; + } + return ObjectInspectorFactory.getStandardUnionObjectInspector( + Arrays.asList(unionOIs)); + } + + @Override + public Object evaluate(DeferredObject[] arguments) throws HiveException { + byte tag = (byte)((IntObjectInspector)tagOI).get(arguments[0].get()); + return new StandardUnion(tag, arguments[tag + 1].get()); + } + + @Override + public String getDisplayString(String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append("create_union("); + for (int i = 0; i < children.length; i++) { + if (i > 0) { + sb.append(','); + } + sb.append(children[i]); + } + sb.append(')'); + return sb.toString(); + } +}