diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 7ab76f9..c4231de 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -211,6 +211,12 @@ public class HiveConf extends Configuration { // not specified DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true), + // By default enum types are treated as struct as they are classes + // with a single int field named value. However, users often refer to Enum + // values by their string name. Enabling this option converts enum values + // to strings at runtime. + CONVERT_ENUM_TO_STRING("hive.data.convert.enum.to.string", false), + // Hadoop Configuration Properties // Properties with null values are ignored and exist only for the purpose of giving us // a symbolic name to reference in the Hive source code. Properties with non-null diff --git a/ql/src/test/queries/clientpositive/convert_enum_to_string.q b/ql/src/test/queries/clientpositive/convert_enum_to_string.q new file mode 100644 index 0000000..f085cb2 --- /dev/null +++ b/ql/src/test/queries/clientpositive/convert_enum_to_string.q @@ -0,0 +1,17 @@ +-- Ensure the default behavior displays Enum fields as struct +-- and that enum-to-string conversion can be enabled/disabled. + +create table convert_enum_to_string + partitioned by (b string) + row format serde "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer" + with serdeproperties ( + "serialization.class"="org.apache.hadoop.hive.serde2.thrift.test.Complex", + "serialization.format"="org.apache.thrift.protocol.TBinaryProtocol"); + +describe convert_enum_to_string; + +set hive.data.convert.enum.to.string=true; +describe convert_enum_to_string; + +set hive.data.convert.enum.to.string=false; +describe convert_enum_to_string; diff --git a/ql/src/test/results/clientpositive/convert_enum_to_string.q.out b/ql/src/test/results/clientpositive/convert_enum_to_string.q.out new file mode 100644 index 0000000..e970ac8 --- /dev/null +++ b/ql/src/test/results/clientpositive/convert_enum_to_string.q.out @@ -0,0 +1,57 @@ +PREHOOK: query: -- Ensure the default behavior displays Enum fields as struct +-- and that enum-to-string conversion can be enabled/disabled. + +create table convert_enum_to_string + partitioned by (b string) + row format serde "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer" + with serdeproperties ( + "serialization.class"="org.apache.hadoop.hive.serde2.thrift.test.Complex", + "serialization.format"="org.apache.thrift.protocol.TBinaryProtocol") +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Ensure the default behavior displays Enum fields as struct +-- and that enum-to-string conversion can be enabled/disabled. + +create table convert_enum_to_string + partitioned by (b string) + row format serde "org.apache.hadoop.hive.serde2.thrift.ThriftDeserializer" + with serdeproperties ( + "serialization.class"="org.apache.hadoop.hive.serde2.thrift.test.Complex", + "serialization.format"="org.apache.thrift.protocol.TBinaryProtocol") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@convert_enum_to_string +PREHOOK: query: describe convert_enum_to_string +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe convert_enum_to_string +POSTHOOK: type: DESCTABLE +aint int from deserializer +astring string from deserializer +lint array from deserializer +lstring array from deserializer +lintstring array> from deserializer +mstringstring map from deserializer +myenum struct from deserializer +b string +PREHOOK: query: describe convert_enum_to_string +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe convert_enum_to_string +POSTHOOK: type: DESCTABLE +aint int from deserializer +astring string from deserializer +lint array from deserializer +lstring array from deserializer +lintstring array> from deserializer +mstringstring map from deserializer +myenum string from deserializer +b string +PREHOOK: query: describe convert_enum_to_string +PREHOOK: type: DESCTABLE +POSTHOOK: query: describe convert_enum_to_string +POSTHOOK: type: DESCTABLE +aint int from deserializer +astring string from deserializer +lint array from deserializer +lstring array from deserializer +lintstring array> from deserializer +mstringstring map from deserializer +myenum struct from deserializer +b string diff --git a/ql/src/test/results/compiler/plan/case_sensitivity.q.xml b/ql/src/test/results/compiler/plan/case_sensitivity.q.xml index 481b395..9165bf4 100644 --- a/ql/src/test/results/compiler/plan/case_sensitivity.q.xml +++ b/ql/src/test/results/compiler/plan/case_sensitivity.q.xml @@ -1089,6 +1089,34 @@ + + myenum + + + src_thrift + + + + + + + value + + + + + + + + + + + + + + + + true diff --git a/ql/src/test/results/compiler/plan/input5.q.xml b/ql/src/test/results/compiler/plan/input5.q.xml index f81e242..96f6ea2 100644 --- a/ql/src/test/results/compiler/plan/input5.q.xml +++ b/ql/src/test/results/compiler/plan/input5.q.xml @@ -461,21 +461,21 @@ - CNTR_NAME_RS_229_NUM_INPUT_ROWS + CNTR_NAME_RS_3_NUM_INPUT_ROWS - CNTR_NAME_RS_229_NUM_OUTPUT_ROWS + CNTR_NAME_RS_3_NUM_OUTPUT_ROWS - CNTR_NAME_RS_229_TIME_TAKEN + CNTR_NAME_RS_3_TIME_TAKEN - CNTR_NAME_RS_229_FATAL_ERROR + CNTR_NAME_RS_3_FATAL_ERROR - RS_229 + RS_3 @@ -636,21 +636,21 @@ - CNTR_NAME_SCR_228_NUM_INPUT_ROWS + CNTR_NAME_SCR_2_NUM_INPUT_ROWS - CNTR_NAME_SCR_228_NUM_OUTPUT_ROWS + CNTR_NAME_SCR_2_NUM_OUTPUT_ROWS - CNTR_NAME_SCR_228_TIME_TAKEN + CNTR_NAME_SCR_2_TIME_TAKEN - CNTR_NAME_SCR_228_FATAL_ERROR + CNTR_NAME_SCR_2_FATAL_ERROR - SCR_228 + SCR_2 @@ -768,21 +768,21 @@ - CNTR_NAME_SEL_227_NUM_INPUT_ROWS + CNTR_NAME_SEL_1_NUM_INPUT_ROWS - CNTR_NAME_SEL_227_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_1_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_227_TIME_TAKEN + CNTR_NAME_SEL_1_TIME_TAKEN - CNTR_NAME_SEL_227_FATAL_ERROR + CNTR_NAME_SEL_1_FATAL_ERROR - SEL_227 + SEL_1 @@ -836,16 +836,16 @@ - CNTR_NAME_TS_226_NUM_INPUT_ROWS + CNTR_NAME_TS_0_NUM_INPUT_ROWS - CNTR_NAME_TS_226_NUM_OUTPUT_ROWS + CNTR_NAME_TS_0_NUM_OUTPUT_ROWS - CNTR_NAME_TS_226_TIME_TAKEN + CNTR_NAME_TS_0_TIME_TAKEN - CNTR_NAME_TS_226_FATAL_ERROR + CNTR_NAME_TS_0_FATAL_ERROR @@ -860,7 +860,7 @@ - TS_226 + TS_0 @@ -957,6 +957,34 @@ + + myenum + + + src_thrift + + + + + + + value + + + + + + + + + + + + + + + + true @@ -1208,21 +1236,21 @@ - CNTR_NAME_FS_232_NUM_INPUT_ROWS + CNTR_NAME_FS_6_NUM_INPUT_ROWS - CNTR_NAME_FS_232_NUM_OUTPUT_ROWS + CNTR_NAME_FS_6_NUM_OUTPUT_ROWS - CNTR_NAME_FS_232_TIME_TAKEN + CNTR_NAME_FS_6_TIME_TAKEN - CNTR_NAME_FS_232_FATAL_ERROR + CNTR_NAME_FS_6_FATAL_ERROR - FS_232 + FS_6 @@ -1328,21 +1356,21 @@ - CNTR_NAME_SEL_231_NUM_INPUT_ROWS + CNTR_NAME_SEL_5_NUM_INPUT_ROWS - CNTR_NAME_SEL_231_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_5_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_231_TIME_TAKEN + CNTR_NAME_SEL_5_TIME_TAKEN - CNTR_NAME_SEL_231_FATAL_ERROR + CNTR_NAME_SEL_5_FATAL_ERROR - SEL_231 + SEL_5 @@ -1409,21 +1437,21 @@ - CNTR_NAME_OP_230_NUM_INPUT_ROWS + CNTR_NAME_OP_4_NUM_INPUT_ROWS - CNTR_NAME_OP_230_NUM_OUTPUT_ROWS + CNTR_NAME_OP_4_NUM_OUTPUT_ROWS - CNTR_NAME_OP_230_TIME_TAKEN + CNTR_NAME_OP_4_TIME_TAKEN - CNTR_NAME_OP_230_FATAL_ERROR + CNTR_NAME_OP_4_FATAL_ERROR - OP_230 + OP_4 diff --git a/ql/src/test/results/compiler/plan/input_testxpath.q.xml b/ql/src/test/results/compiler/plan/input_testxpath.q.xml index c10e188..cc77ddb 100644 --- a/ql/src/test/results/compiler/plan/input_testxpath.q.xml +++ b/ql/src/test/results/compiler/plan/input_testxpath.q.xml @@ -213,21 +213,21 @@ - CNTR_NAME_FS_303_NUM_INPUT_ROWS + CNTR_NAME_FS_2_NUM_INPUT_ROWS - CNTR_NAME_FS_303_NUM_OUTPUT_ROWS + CNTR_NAME_FS_2_NUM_OUTPUT_ROWS - CNTR_NAME_FS_303_TIME_TAKEN + CNTR_NAME_FS_2_TIME_TAKEN - CNTR_NAME_FS_303_FATAL_ERROR + CNTR_NAME_FS_2_FATAL_ERROR - FS_303 + FS_2 @@ -501,21 +501,21 @@ - CNTR_NAME_SEL_302_NUM_INPUT_ROWS + CNTR_NAME_SEL_1_NUM_INPUT_ROWS - CNTR_NAME_SEL_302_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_1_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_302_TIME_TAKEN + CNTR_NAME_SEL_1_TIME_TAKEN - CNTR_NAME_SEL_302_FATAL_ERROR + CNTR_NAME_SEL_1_FATAL_ERROR - SEL_302 + SEL_1 @@ -588,16 +588,16 @@ - CNTR_NAME_TS_301_NUM_INPUT_ROWS + CNTR_NAME_TS_0_NUM_INPUT_ROWS - CNTR_NAME_TS_301_NUM_OUTPUT_ROWS + CNTR_NAME_TS_0_NUM_OUTPUT_ROWS - CNTR_NAME_TS_301_TIME_TAKEN + CNTR_NAME_TS_0_TIME_TAKEN - CNTR_NAME_TS_301_FATAL_ERROR + CNTR_NAME_TS_0_FATAL_ERROR @@ -615,7 +615,7 @@ - TS_301 + TS_0 @@ -705,6 +705,34 @@ + + myenum + + + src_thrift + + + + + + + value + + + + + + + + + + + + + + + + true diff --git a/ql/src/test/results/compiler/plan/input_testxpath2.q.xml b/ql/src/test/results/compiler/plan/input_testxpath2.q.xml index 85f6b96..199929f 100644 --- a/ql/src/test/results/compiler/plan/input_testxpath2.q.xml +++ b/ql/src/test/results/compiler/plan/input_testxpath2.q.xml @@ -217,21 +217,21 @@ - CNTR_NAME_FS_310_NUM_INPUT_ROWS + CNTR_NAME_FS_3_NUM_INPUT_ROWS - CNTR_NAME_FS_310_NUM_OUTPUT_ROWS + CNTR_NAME_FS_3_NUM_OUTPUT_ROWS - CNTR_NAME_FS_310_TIME_TAKEN + CNTR_NAME_FS_3_TIME_TAKEN - CNTR_NAME_FS_310_FATAL_ERROR + CNTR_NAME_FS_3_FATAL_ERROR - FS_310 + FS_3 @@ -462,21 +462,21 @@ - CNTR_NAME_SEL_309_NUM_INPUT_ROWS + CNTR_NAME_SEL_2_NUM_INPUT_ROWS - CNTR_NAME_SEL_309_NUM_OUTPUT_ROWS + CNTR_NAME_SEL_2_NUM_OUTPUT_ROWS - CNTR_NAME_SEL_309_TIME_TAKEN + CNTR_NAME_SEL_2_TIME_TAKEN - CNTR_NAME_SEL_309_FATAL_ERROR + CNTR_NAME_SEL_2_FATAL_ERROR - SEL_309 + SEL_2 @@ -629,21 +629,21 @@ - CNTR_NAME_FIL_311_NUM_INPUT_ROWS + CNTR_NAME_FIL_4_NUM_INPUT_ROWS - CNTR_NAME_FIL_311_NUM_OUTPUT_ROWS + CNTR_NAME_FIL_4_NUM_OUTPUT_ROWS - CNTR_NAME_FIL_311_TIME_TAKEN + CNTR_NAME_FIL_4_TIME_TAKEN - CNTR_NAME_FIL_311_FATAL_ERROR + CNTR_NAME_FIL_4_FATAL_ERROR - FIL_311 + FIL_4 @@ -740,6 +740,34 @@ + + myenum + + + src_thrift + + + + + + + value + + + + + + + + + + + + + + + + true @@ -795,16 +823,16 @@ - CNTR_NAME_TS_307_NUM_INPUT_ROWS + CNTR_NAME_TS_0_NUM_INPUT_ROWS - CNTR_NAME_TS_307_NUM_OUTPUT_ROWS + CNTR_NAME_TS_0_NUM_OUTPUT_ROWS - CNTR_NAME_TS_307_TIME_TAKEN + CNTR_NAME_TS_0_TIME_TAKEN - CNTR_NAME_TS_307_FATAL_ERROR + CNTR_NAME_TS_0_FATAL_ERROR @@ -822,7 +850,7 @@ - TS_307 + TS_0 diff --git a/serde/if/test/complex.thrift b/serde/if/test/complex.thrift index 308b64c..d4d0317 100644 --- a/serde/if/test/complex.thrift +++ b/serde/if/test/complex.thrift @@ -18,6 +18,11 @@ namespace java org.apache.hadoop.hive.serde2.thrift.test +enum MyEnum { + ALPACA = 1, + LLAMA = 2 +} + struct IntString { 1: i32 myint; 2: string myString; @@ -31,4 +36,5 @@ struct Complex { 4: list lString; 5: list lintString; 6: map mStringString; + 7: MyEnum myEnum; } diff --git a/serde/ivy.xml b/serde/ivy.xml index ab4ac30..060e71c 100644 --- a/serde/ivy.xml +++ b/serde/ivy.xml @@ -39,6 +39,7 @@ + lString; // required private List lintString; // required private Map mStringString; // required + private MyEnum myEnum; // required /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -44,7 +46,12 @@ public class Complex implements org.apache.thrift.TBase byName = new HashMap(); @@ -71,6 +78,8 @@ public class Complex implements org.apache.thrift.TBase lint, List lString, List lintString, - Map mStringString) + Map mStringString, + MyEnum myEnum) { this(); this.aint = aint; @@ -157,6 +169,7 @@ public class Complex implements org.apache.thrift.TBase objectInspectorCache = new HashMap(); + private static Properties objectInspectionProperties = new Properties(); + + public static void setObjectInspectionProperty(String key, String value) { + objectInspectionProperties.setProperty(key, value); + } public static ObjectInspector getReflectionObjectInspector(Type t, ObjectInspectorOptions options) { @@ -155,6 +162,15 @@ public final class ObjectInspectorFactory { .getTypeEntryFromPrimitiveWritableClass(c).primitiveCategory); } + // Enum class? + if ( Boolean.parseBoolean(objectInspectionProperties.getProperty( + HiveConf.ConfVars.CONVERT_ENUM_TO_STRING.toString(), + HiveConf.ConfVars.CONVERT_ENUM_TO_STRING.defaultVal)) && + Enum.class.isAssignableFrom(c)) { + return PrimitiveObjectInspectorFactory + .getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING); + } + // Must be struct because List and Map need to be ParameterizedType assert (!List.class.isAssignableFrom(c)); assert (!Map.class.isAssignableFrom(c)); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java index 921ce2b..b496ca5 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/JavaStringObjectInspector.java @@ -32,12 +32,28 @@ public class JavaStringObjectInspector extends @Override public Text getPrimitiveWritableObject(Object o) { - return o == null ? null : new Text(((String) o)); + if (o == null) { + return null; + } + + if (Enum.class.isAssignableFrom(o.getClass())) { + return new Text(o.toString()); + } else { + return new Text((String) o); + } } @Override public String getPrimitiveJavaObject(Object o) { - return (String) o; + if (o == null) { + return null; + } + + if (Enum.class.isAssignableFrom(o.getClass())) { + return o.toString(); + } else { + return (String) o; + } } @Override diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java index d98c5fb..13a4776 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftByteStreamTypedSerDe.java @@ -22,8 +22,10 @@ import java.lang.reflect.Type; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.ByteStreamTypedSerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.Writable; import org.apache.thrift.TBase; @@ -72,6 +74,12 @@ public class ThriftByteStreamTypedSerDe extends ByteStreamTypedSerDe { } @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return ObjectInspectorFactory.getReflectionObjectInspector(objectType, + getObjectInspectorOptions()); + } + + @Override public Object deserialize(Writable field) throws SerDeException { Object obj = super.deserialize(field); try { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java index e5696ab..09f73d4 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/thrift/ThriftDeserializer.java @@ -21,10 +21,12 @@ package org.apache.hadoop.hive.serde2.thrift; import java.util.Properties; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.io.Writable; import org.apache.thrift.protocol.TProtocolFactory; @@ -61,6 +63,11 @@ public class ThriftDeserializer implements Deserializer { TProtocolFactory tp = TReflectionUtils .getProtocolFactoryByName(protoName); + + ObjectInspectorFactory.setObjectInspectionProperty(HiveConf.ConfVars.CONVERT_ENUM_TO_STRING.toString(), + job.get(HiveConf.ConfVars.CONVERT_ENUM_TO_STRING.toString(), + HiveConf.ConfVars.CONVERT_ENUM_TO_STRING.defaultVal)); + tsd = new ThriftByteStreamTypedSerDe(recordClass, tp, tp); } catch (Exception e) { diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java index a18f4a7..037165d 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestObjectInspectorUtils.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import com.google.common.collect.Lists; import junit.framework.TestCase; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; @@ -46,7 +47,7 @@ public class TestObjectInspectorUtils extends TestCase { StructObjectInspector soi = (StructObjectInspector) ObjectInspectorUtils .getStandardObjectInspector(oi1); List fields = soi.getAllStructFieldRefs(); - assertEquals(6, fields.size()); + assertEquals(7, fields.size()); assertEquals(fields.get(0), soi.getStructFieldRef("aint")); // null @@ -65,6 +66,7 @@ public class TestObjectInspectorUtils extends TestCase { List c4 = new ArrayList(); cc.setLintString(c4); cc.setMStringString(null); + cc.setMyEnum(null); // standard object Object c = ObjectInspectorUtils.copyToStandardObject(cc, oi1); @@ -74,8 +76,9 @@ public class TestObjectInspectorUtils extends TestCase { assertEquals(c3, soi.getStructFieldData(c, fields.get(3))); assertEquals(c4, soi.getStructFieldData(c, fields.get(4))); assertNull(soi.getStructFieldData(c, fields.get(5))); + assertNull(soi.getStructFieldData(c, fields.get(6))); ArrayList cfields = new ArrayList(); - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { cfields.add(soi.getStructFieldData(c, fields.get(i))); } assertEquals(cfields, soi.getStructFieldsDataAsList(c)); @@ -103,6 +106,9 @@ public class TestObjectInspectorUtils extends TestCase { PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields .get(5).getFieldObjectInspector()); + assertEquals(ObjectInspectorFactory.getStandardStructObjectInspector( + Lists.newArrayList("value"), new ArrayList()), + fields.get(6).getFieldObjectInspector()); } catch (Throwable e) { e.printStackTrace(); throw e; diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java index 5f692fb..31411c9 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestThriftObjectInspectors.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.thrift.test.Complex; import org.apache.hadoop.hive.serde2.thrift.test.IntString; +import org.apache.hadoop.hive.serde2.thrift.test.MyEnum; /** * TestThriftObjectInspectors. @@ -49,7 +50,7 @@ public class TestThriftObjectInspectors extends TestCase { assertEquals(Category.STRUCT, oi1.getCategory()); StructObjectInspector soi = (StructObjectInspector) oi1; List fields = soi.getAllStructFieldRefs(); - assertEquals(6, fields.size()); + assertEquals(7, fields.size()); assertEquals(fields.get(0), soi.getStructFieldRef("aint")); // null @@ -68,6 +69,7 @@ public class TestThriftObjectInspectors extends TestCase { List c4 = new ArrayList(); c.setLintString(c4); c.setMStringString(null); + c.setMyEnum(MyEnum.ALPACA); assertEquals(1, soi.getStructFieldData(c, fields.get(0))); assertEquals("test", soi.getStructFieldData(c, fields.get(1))); @@ -75,8 +77,9 @@ public class TestThriftObjectInspectors extends TestCase { assertEquals(c3, soi.getStructFieldData(c, fields.get(3))); assertEquals(c4, soi.getStructFieldData(c, fields.get(4))); assertNull(soi.getStructFieldData(c, fields.get(5))); + assertEquals(MyEnum.ALPACA, soi.getStructFieldData(c, fields.get(6))); ArrayList cfields = new ArrayList(); - for (int i = 0; i < 6; i++) { + for (int i = 0; i < 7; i++) { cfields.add(soi.getStructFieldData(c, fields.get(i))); } assertEquals(cfields, soi.getStructFieldsDataAsList(c)); @@ -104,6 +107,8 @@ public class TestThriftObjectInspectors extends TestCase { PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector), fields .get(5).getFieldObjectInspector()); + ObjectInspectorFactory.getReflectionObjectInspector( + Complex.class, ObjectInspectorFactory.ObjectInspectorOptions.THRIFT); } catch (Throwable e) { e.printStackTrace(); throw e; diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java b/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java index 8aef773..7b76fa2 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/thrift_test/CreateSequenceFile.java @@ -1,145 +1,147 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.serde2.thrift_test; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Random; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.thrift.test.Complex; -import org.apache.hadoop.hive.serde2.thrift.test.IntString; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.JobConf; -import org.apache.thrift.TBase; -import org.apache.thrift.TException; -import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.thrift.protocol.TProtocol; -import org.apache.thrift.protocol.TProtocolFactory; -import org.apache.thrift.transport.TIOStreamTransport; - -/** - * CreateSequenceFile. - * - */ -public final class CreateSequenceFile { - - private CreateSequenceFile() { - // prevent instantiation - } - - public static void usage() { - System.out.println("Usage: CreateSequenceFile "); - System.exit(1); - } - - /** - * ThriftSerializer. - * - */ - public static class ThriftSerializer { - - private ByteStream.Output bos; - private TProtocol outProtocol; - - public ThriftSerializer() { - bos = new ByteStream.Output(); - TIOStreamTransport outTransport = new TIOStreamTransport(bos); - TProtocolFactory outFactory = new TBinaryProtocol.Factory(); - outProtocol = outFactory.getProtocol(outTransport); - } - - private BytesWritable bw = new BytesWritable(); - - public BytesWritable serialize(TBase base) throws TException { - bos.reset(); - base.write(outProtocol); - bw.set(bos.getData(), 0, bos.getCount()); - return bw; - } - } - - public static void main(String[] args) throws Exception { - - // Read parameters - int lines = 10; - List extraArgs = new ArrayList(); - for (int ai = 0; ai < args.length; ai++) { - if (args[ai].equals("-line") && ai + 1 < args.length) { - lines = Integer.parseInt(args[ai + 1]); - ai++; - } else { - extraArgs.add(args[ai]); - } - } - if (extraArgs.size() != 1) { - usage(); - } - - JobConf conf = new JobConf(CreateSequenceFile.class); - - ThriftSerializer serializer = new ThriftSerializer(); - - // Open files - SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), - conf, new Path(extraArgs.get(0)), BytesWritable.class, - BytesWritable.class); - - // write to file - BytesWritable key = new BytesWritable(); - - Random rand = new Random(20081215); - - for (int i = 0; i < lines; i++) { - - ArrayList alist = new ArrayList(); - alist.add(i); - alist.add(i * 2); - alist.add(i * 3); - ArrayList slist = new ArrayList(); - slist.add("" + i * 10); - slist.add("" + i * 100); - slist.add("" + i * 1000); - ArrayList islist = new ArrayList(); - islist.add(new IntString(i * i, "" + i * i * i, i)); - HashMap hash = new HashMap(); - hash.put("key_" + i, "value_" + i); - - Complex complex = new Complex(rand.nextInt(), "record_" - + (new Integer(i)).toString(), alist, slist, islist, hash); - - Writable value = serializer.serialize(complex); - writer.append(key, value); - } - - // Add an all-null record - Complex complex = new Complex(0, null, null, null, null, null); - Writable value = serializer.serialize(complex); - writer.append(key, value); - - // Close files - writer.close(); - } - -} +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2.thrift_test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Random; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.thrift.test.Complex; +import org.apache.hadoop.hive.serde2.thrift.test.IntString; +import org.apache.hadoop.hive.serde2.thrift.test.MyEnum; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.thrift.TBase; +import org.apache.thrift.TException; +import org.apache.thrift.protocol.TBinaryProtocol; +import org.apache.thrift.protocol.TProtocol; +import org.apache.thrift.protocol.TProtocolFactory; +import org.apache.thrift.transport.TIOStreamTransport; + +/** + * CreateSequenceFile. + * + */ +public final class CreateSequenceFile { + + private CreateSequenceFile() { + // prevent instantiation + } + + public static void usage() { + System.out.println("Usage: CreateSequenceFile "); + System.exit(1); + } + + /** + * ThriftSerializer. + * + */ + public static class ThriftSerializer { + + private ByteStream.Output bos; + private TProtocol outProtocol; + + public ThriftSerializer() { + bos = new ByteStream.Output(); + TIOStreamTransport outTransport = new TIOStreamTransport(bos); + TProtocolFactory outFactory = new TBinaryProtocol.Factory(); + outProtocol = outFactory.getProtocol(outTransport); + } + + private BytesWritable bw = new BytesWritable(); + + public BytesWritable serialize(TBase base) throws TException { + bos.reset(); + base.write(outProtocol); + bw.set(bos.getData(), 0, bos.getCount()); + return bw; + } + } + + public static void main(String[] args) throws Exception { + + // Read parameters + int lines = 10; + List extraArgs = new ArrayList(); + for (int ai = 0; ai < args.length; ai++) { + if (args[ai].equals("-line") && ai + 1 < args.length) { + lines = Integer.parseInt(args[ai + 1]); + ai++; + } else { + extraArgs.add(args[ai]); + } + } + if (extraArgs.size() != 1) { + usage(); + } + + JobConf conf = new JobConf(CreateSequenceFile.class); + + ThriftSerializer serializer = new ThriftSerializer(); + + // Open files + SequenceFile.Writer writer = new SequenceFile.Writer(FileSystem.get(conf), + conf, new Path(extraArgs.get(0)), BytesWritable.class, + BytesWritable.class); + + // write to file + BytesWritable key = new BytesWritable(); + + Random rand = new Random(20081215); + + for (int i = 0; i < lines; i++) { + + ArrayList alist = new ArrayList(); + alist.add(i); + alist.add(i * 2); + alist.add(i * 3); + ArrayList slist = new ArrayList(); + slist.add("" + i * 10); + slist.add("" + i * 100); + slist.add("" + i * 1000); + ArrayList islist = new ArrayList(); + islist.add(new IntString(i * i, "" + i * i * i, i)); + HashMap hash = new HashMap(); + hash.put("key_" + i, "value_" + i); + + Complex complex = new Complex(rand.nextInt(), "record_" + + (new Integer(i)).toString(), alist, slist, islist, hash, + MyEnum.findByValue(rand.nextInt(MyEnum.values().length) + 1 )); + + Writable value = serializer.serialize(complex); + writer.append(key, value); + } + + // Add an all-null record + Complex complex = new Complex(0, null, null, null, null, null, null); + Writable value = serializer.serialize(complex); + writer.append(key, value); + + // Close files + writer.close(); + } + +}