diff --git ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java index fac0cbb..db5f5e2 100644 --- ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java +++ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java @@ -9913,6 +9913,10 @@ public Type parsePartialFrom( * VARCHAR = 16; */ VARCHAR(16, 16), + /** + * CHAR = 17; + */ + CHAR(17, 17), ; /** @@ -9983,6 +9987,10 @@ public Type parsePartialFrom( * VARCHAR = 16; */ public static final int VARCHAR_VALUE = 16; + /** + * CHAR = 17; + */ + public static final int CHAR_VALUE = 17; public final int getNumber() { return value; } @@ -10006,6 +10014,7 @@ public static Kind valueOf(int value) { case 14: return DECIMAL; case 15: return DATE; case 16: return VARCHAR; + case 17: return CHAR; default: return null; } } @@ -16767,40 +16776,40 @@ public Builder setMagicBytes( "9\n\007streams\030\001 \003(\0132(.org.apache.hadoop.hiv" + "e.ql.io.orc.Stream\022A\n\007columns\030\002 \003(\01320.or" + "g.apache.hadoop.hive.ql.io.orc.ColumnEnc" + - "oding\"\356\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" + + "oding\"\370\002\n\004Type\0229\n\004kind\030\001 \002(\0162+.org.apach" + "e.hadoop.hive.ql.io.orc.Type.Kind\022\024\n\010sub" + "types\030\002 \003(\rB\002\020\001\022\022\n\nfieldNames\030\003 \003(\t\022\025\n\rm" + "aximumLength\030\004 \001(\r\022\021\n\tprecision\030\005 \001(\r\022\r\n" + - "\005scale\030\006 \001(\r\"\307\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BY" + + "\005scale\030\006 \001(\r\"\321\001\n\004Kind\022\013\n\007BOOLEAN\020\000\022\010\n\004BY" + "TE\020\001\022\t\n\005SHORT\020\002\022\007\n\003INT\020\003\022\010\n\004LONG\020\004\022\t\n\005FL", "OAT\020\005\022\n\n\006DOUBLE\020\006\022\n\n\006STRING\020\007\022\n\n\006BINARY\020" + "\010\022\r\n\tTIMESTAMP\020\t\022\010\n\004LIST\020\n\022\007\n\003MAP\020\013\022\n\n\006S" + "TRUCT\020\014\022\t\n\005UNION\020\r\022\013\n\007DECIMAL\020\016\022\010\n\004DATE\020" + - "\017\022\013\n\007VARCHAR\020\020\"x\n\021StripeInformation\022\016\n\006o" + - "ffset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001(\004\022\022\n\ndata" + - "Length\030\003 \001(\004\022\024\n\014footerLength\030\004 \001(\004\022\024\n\014nu" + - "mberOfRows\030\005 \001(\004\"/\n\020UserMetadataItem\022\014\n\004" + - "name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020StripeStati" + - "stics\022D\n\010colStats\030\001 \003(\01322.org.apache.had" + - "oop.hive.ql.io.orc.ColumnStatistics\"S\n\010M", - "etadata\022G\n\013stripeStats\030\001 \003(\01322.org.apach" + - "e.hadoop.hive.ql.io.orc.StripeStatistics" + - "\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 \001(\004\022\025\n\rcon" + - "tentLength\030\002 \001(\004\022D\n\007stripes\030\003 \003(\01323.org." + - "apache.hadoop.hive.ql.io.orc.StripeInfor" + - "mation\0225\n\005types\030\004 \003(\0132&.org.apache.hadoo" + - "p.hive.ql.io.orc.Type\022D\n\010metadata\030\005 \003(\0132" + - "2.org.apache.hadoop.hive.ql.io.orc.UserM" + - "etadataItem\022\024\n\014numberOfRows\030\006 \001(\004\022F\n\nsta" + - "tistics\030\007 \003(\01322.org.apache.hadoop.hive.q", - "l.io.orc.ColumnStatistics\022\026\n\016rowIndexStr" + - "ide\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014footerLength" + - "\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621.org.apache" + - ".hadoop.hive.ql.io.orc.CompressionKind\022\034" + - "\n\024compressionBlockSize\030\003 \001(\004\022\023\n\007version\030" + - "\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 \001(\004\022\016\n\005mag" + - "ic\030\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" + - "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" + "\017\022\013\n\007VARCHAR\020\020\022\010\n\004CHAR\020\021\"x\n\021StripeInform" + + "ation\022\016\n\006offset\030\001 \001(\004\022\023\n\013indexLength\030\002 \001" + + "(\004\022\022\n\ndataLength\030\003 \001(\004\022\024\n\014footerLength\030\004" + + " \001(\004\022\024\n\014numberOfRows\030\005 \001(\004\"/\n\020UserMetada" + + "taItem\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030\002 \002(\014\"X\n\020S" + + "tripeStatistics\022D\n\010colStats\030\001 \003(\01322.org." + + "apache.hadoop.hive.ql.io.orc.ColumnStati", + "stics\"S\n\010Metadata\022G\n\013stripeStats\030\001 \003(\01322" + + ".org.apache.hadoop.hive.ql.io.orc.Stripe" + + "Statistics\"\356\002\n\006Footer\022\024\n\014headerLength\030\001 " + + "\001(\004\022\025\n\rcontentLength\030\002 \001(\004\022D\n\007stripes\030\003 " + + "\003(\01323.org.apache.hadoop.hive.ql.io.orc.S" + + "tripeInformation\0225\n\005types\030\004 \003(\0132&.org.ap" + + "ache.hadoop.hive.ql.io.orc.Type\022D\n\010metad" + + "ata\030\005 \003(\01322.org.apache.hadoop.hive.ql.io" + + ".orc.UserMetadataItem\022\024\n\014numberOfRows\030\006 " + + "\001(\004\022F\n\nstatistics\030\007 \003(\01322.org.apache.had", + "oop.hive.ql.io.orc.ColumnStatistics\022\026\n\016r" + + "owIndexStride\030\010 \001(\r\"\305\001\n\nPostScript\022\024\n\014fo" + + "oterLength\030\001 \001(\004\022F\n\013compression\030\002 \001(\01621." + + "org.apache.hadoop.hive.ql.io.orc.Compres" + + "sionKind\022\034\n\024compressionBlockSize\030\003 \001(\004\022\023" + + "\n\007version\030\004 \003(\rB\002\020\001\022\026\n\016metadataLength\030\005 " + + "\001(\004\022\016\n\005magic\030\300> \001(\t*:\n\017CompressionKind\022\010" + + "\n\004NONE\020\000\022\010\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java index e257887..409de7c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ColumnStatisticsImpl.java @@ -789,6 +789,7 @@ static ColumnStatisticsImpl create(ObjectInspector inspector) { case DOUBLE: return new DoubleStatisticsImpl(); case STRING: + case CHAR: case VARCHAR: return new StringStatisticsImpl(); case DECIMAL: diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java index 7195af0..b81ca46 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java @@ -483,6 +483,9 @@ static ObjectInspector createObjectInspector(TypeInfo info) { return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; case STRING: return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + case CHAR: + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) info); case VARCHAR: return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( (PrimitiveTypeInfo) info); @@ -533,6 +536,13 @@ static ObjectInspector createObjectInspector(int columnId, return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; case STRING: return PrimitiveObjectInspectorFactory.writableStringObjectInspector; + case CHAR: + if (!type.hasMaximumLength()) { + throw new UnsupportedOperationException( + "Illegal use of char type without length in ORC type definition."); + } + return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + TypeInfoFactory.getCharTypeInfo(type.getMaximumLength())); case VARCHAR: if (!type.hasMaximumLength()) { throw new UnsupportedOperationException( diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java index d537f3f..173fb8d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; @@ -1471,6 +1472,34 @@ void skipRows(long items) throws IOException { } } + private static class CharTreeReader extends StringTreeReader { + int maxLength; + + CharTreeReader(Path path, int columnId, int maxLength) { + super(path, columnId); + this.maxLength = maxLength; + } + + @Override + Object next(Object previous) throws IOException { + HiveCharWritable result = null; + if (previous == null) { + result = new HiveCharWritable(); + } else { + result = (HiveCharWritable) previous; + } + // Use the string reader implementation to populate the internal Text value + Object textVal = super.next(result.getTextValue()); + if (textVal == null) { + return null; + } + // result should now hold the value that was read in. + // enforce char length + result.enforceMaxLength(maxLength); + return result; + } + } + private static class VarcharTreeReader extends StringTreeReader { int maxLength; @@ -1890,6 +1919,11 @@ private static TreeReader createTreeReader(Path path, return new LongTreeReader(path, columnId); case STRING: return new StringTreeReader(path, columnId); + case CHAR: + if (!type.hasMaximumLength()) { + throw new IllegalArgumentException("ORC char type has no length specified"); + } + return new CharTreeReader(path, columnId, type.getMaximumLength()); case VARCHAR: if (!type.hasMaximumLength()) { throw new IllegalArgumentException("ORC varchar type has no length specified"); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java index 4658049..bc26b12 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.DateObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveCharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; @@ -60,6 +61,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; @@ -1053,6 +1055,28 @@ long estimateMemory() { } /** + * Under the covers, char is written to ORC the same way as string. + */ + private static class CharTreeWriter extends StringTreeWriter { + + CharTreeWriter(int columnId, + ObjectInspector inspector, + StreamFactory writer, + boolean nullable) throws IOException { + super(columnId, inspector, writer, nullable); + } + + /** + * Override base class implementation to support char values. + */ + @Override + String getStringValue(Object obj) { + return (((HiveCharObjectInspector) inspector) + .getPrimitiveJavaObject(obj)).getValue(); + } + } + + /** * Under the covers, varchar is written to ORC the same way as string. */ private static class VarcharTreeWriter extends StringTreeWriter { @@ -1564,6 +1588,9 @@ private static TreeWriter createTreeWriter(ObjectInspector inspector, case STRING: return new StringTreeWriter(streamFactory.getNextColumnId(), inspector, streamFactory, nullable); + case CHAR: + return new CharTreeWriter(streamFactory.getNextColumnId(), + inspector, streamFactory, nullable); case VARCHAR: return new VarcharTreeWriter(streamFactory.getNextColumnId(), inspector, streamFactory, nullable); @@ -1632,6 +1659,13 @@ private static void writeTypes(OrcProto.Footer.Builder builder, case STRING: type.setKind(OrcProto.Type.Kind.STRING); break; + case CHAR: + // The char length needs to be written to file and should be available + // from the object inspector + CharTypeInfo charTypeInfo = (CharTypeInfo) ((PrimitiveObjectInspector) treeWriter.inspector).getTypeInfo(); + type.setKind(Type.Kind.CHAR); + type.setMaximumLength(charTypeInfo.getLength()); + break; case VARCHAR: // The varchar length needs to be written to file and should be available // from the object inspector diff --git ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto index 89d3763..d52d0b6 100644 --- ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto +++ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto @@ -130,6 +130,7 @@ message Type { DECIMAL = 14; DATE = 15; VARCHAR = 16; + CHAR = 17; } required Kind kind = 1; repeated uint32 subtypes = 2 [packed=true]; diff --git ql/src/test/queries/clientpositive/char_serde.q ql/src/test/queries/clientpositive/char_serde.q new file mode 100644 index 0000000..4340b4d --- /dev/null +++ ql/src/test/queries/clientpositive/char_serde.q @@ -0,0 +1,102 @@ +drop table if exists char_serde_regex; +drop table if exists char_serde_lb; +drop table if exists char_serde_ls; +drop table if exists char_serde_c; +drop table if exists char_serde_lbc; +drop table if exists char_serde_orc; + +-- +-- RegexSerDe +-- +create table char_serde_regex ( + key char(15), + value char(20) +) +row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe' +with serdeproperties ( + "input.regex" = "([^]*)([^]*)" +) +stored as textfile; + +load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex; + +select * from char_serde_regex limit 5; +select value, count(*) from char_serde_regex group by value limit 5; + +-- +-- LazyBinary +-- +create table char_serde_lb ( + key char(15), + value char(20) +); +alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe'; + +insert overwrite table char_serde_lb + select key, value from char_serde_regex; +select * from char_serde_lb limit 5; +select value, count(*) from char_serde_lb group by value limit 5; + +-- +-- LazySimple +-- +create table char_serde_ls ( + key char(15), + value char(20) +); +alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; + +insert overwrite table char_serde_ls + select key, value from char_serde_lb; +select * from char_serde_ls limit 5; +select value, count(*) from char_serde_ls group by value limit 5; + +-- +-- Columnar +-- +create table char_serde_c ( + key char(15), + value char(20) +) stored as rcfile; +alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'; + +insert overwrite table char_serde_c + select key, value from char_serde_ls; +select * from char_serde_c limit 5; +select value, count(*) from char_serde_c group by value limit 5; + +-- +-- LazyBinaryColumnar +-- +create table char_serde_lbc ( + key char(15), + value char(20) +) stored as rcfile; +alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; + +insert overwrite table char_serde_lbc + select key, value from char_serde_c; +select * from char_serde_lbc limit 5; +select value, count(*) from char_serde_lbc group by value limit 5; + +-- +-- ORC +-- +create table char_serde_orc ( + key char(15), + value char(20) +) stored as orc; +alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde'; + + +insert overwrite table char_serde_orc + select key, value from char_serde_lbc; +select * from char_serde_orc limit 5; +select value, count(*) from char_serde_orc group by value limit 5; + +drop table if exists char_serde_regex; +drop table if exists char_serde_lb; +drop table if exists char_serde_ls; +drop table if exists char_serde_c; +drop table if exists char_serde_lbc; +drop table if exists char_serde_orc; diff --git ql/src/test/results/clientpositive/char_serde.q.out ql/src/test/results/clientpositive/char_serde.q.out new file mode 100644 index 0000000..76949b2 --- /dev/null +++ ql/src/test/results/clientpositive/char_serde.q.out @@ -0,0 +1,626 @@ +PREHOOK: query: drop table if exists char_serde_regex +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_serde_regex +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_serde_lb +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_serde_lb +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_serde_ls +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_serde_ls +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_serde_c +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_serde_c +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_serde_lbc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_serde_lbc +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists char_serde_orc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists char_serde_orc +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- +-- RegexSerDe +-- +create table char_serde_regex ( + key char(15), + value char(20) +) +row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe' +with serdeproperties ( + "input.regex" = "([^]*)([^]*)" +) +stored as textfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- +-- RegexSerDe +-- +create table char_serde_regex ( + key char(15), + value char(20) +) +row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe' +with serdeproperties ( + "input.regex" = "([^]*)([^]*)" +) +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_serde_regex +PREHOOK: query: load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex +PREHOOK: type: LOAD +PREHOOK: Output: default@char_serde_regex +POSTHOOK: query: load data local inpath '../../data/files/srcbucket0.txt' overwrite into table char_serde_regex +POSTHOOK: type: LOAD +POSTHOOK: Output: default@char_serde_regex +PREHOOK: query: select * from char_serde_regex limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_regex +#### A masked pattern was here #### +POSTHOOK: query: select * from char_serde_regex limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_regex +#### A masked pattern was here #### +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_regex group by value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_regex +#### A masked pattern was here #### +POSTHOOK: query: select value, count(*) from char_serde_regex group by value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_regex +#### A masked pattern was here #### +val_0 3 +val_1 2 +val_10 1 +val_100 2 +val_101 2 +PREHOOK: query: -- +-- LazyBinary +-- +create table char_serde_lb ( + key char(15), + value char(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- +-- LazyBinary +-- +create table char_serde_lb ( + key char(15), + value char(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_serde_lb +PREHOOK: query: alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@char_serde_lb +PREHOOK: Output: default@char_serde_lb +POSTHOOK: query: alter table char_serde_lb set serde 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@char_serde_lb +POSTHOOK: Output: default@char_serde_lb +PREHOOK: query: insert overwrite table char_serde_lb + select key, value from char_serde_regex +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_regex +PREHOOK: Output: default@char_serde_lb +POSTHOOK: query: insert overwrite table char_serde_lb + select key, value from char_serde_regex +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_regex +POSTHOOK: Output: default@char_serde_lb +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: select * from char_serde_lb limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_lb +#### A masked pattern was here #### +POSTHOOK: query: select * from char_serde_lb limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_lb +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_lb group by value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_lb +#### A masked pattern was here #### +POSTHOOK: query: select value, count(*) from char_serde_lb group by value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_lb +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +val_0 3 +val_1 2 +val_10 1 +val_100 2 +val_101 2 +PREHOOK: query: -- +-- LazySimple +-- +create table char_serde_ls ( + key char(15), + value char(20) +) +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- +-- LazySimple +-- +create table char_serde_ls ( + key char(15), + value char(20) +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_serde_ls +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@char_serde_ls +PREHOOK: Output: default@char_serde_ls +POSTHOOK: query: alter table char_serde_ls set serde 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@char_serde_ls +POSTHOOK: Output: default@char_serde_ls +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: insert overwrite table char_serde_ls + select key, value from char_serde_lb +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_lb +PREHOOK: Output: default@char_serde_ls +POSTHOOK: query: insert overwrite table char_serde_ls + select key, value from char_serde_lb +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_lb +POSTHOOK: Output: default@char_serde_ls +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: select * from char_serde_ls limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_ls +#### A masked pattern was here #### +POSTHOOK: query: select * from char_serde_ls limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_ls +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_ls group by value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_ls +#### A masked pattern was here #### +POSTHOOK: query: select value, count(*) from char_serde_ls group by value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_ls +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +val_0 3 +val_1 2 +val_10 1 +val_100 2 +val_101 2 +PREHOOK: query: -- +-- Columnar +-- +create table char_serde_c ( + key char(15), + value char(20) +) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- +-- Columnar +-- +create table char_serde_c ( + key char(15), + value char(20) +) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_serde_c +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@char_serde_c +PREHOOK: Output: default@char_serde_c +POSTHOOK: query: alter table char_serde_c set serde 'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@char_serde_c +POSTHOOK: Output: default@char_serde_c +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: insert overwrite table char_serde_c + select key, value from char_serde_ls +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_ls +PREHOOK: Output: default@char_serde_c +POSTHOOK: query: insert overwrite table char_serde_c + select key, value from char_serde_ls +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_ls +POSTHOOK: Output: default@char_serde_c +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: select * from char_serde_c limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_c +#### A masked pattern was here #### +POSTHOOK: query: select * from char_serde_c limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_c +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_c group by value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_c +#### A masked pattern was here #### +POSTHOOK: query: select value, count(*) from char_serde_c group by value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_c +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +val_0 3 +val_1 2 +val_10 1 +val_100 2 +val_101 2 +PREHOOK: query: -- +-- LazyBinaryColumnar +-- +create table char_serde_lbc ( + key char(15), + value char(20) +) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- +-- LazyBinaryColumnar +-- +create table char_serde_lbc ( + key char(15), + value char(20) +) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_serde_lbc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@char_serde_lbc +PREHOOK: Output: default@char_serde_lbc +POSTHOOK: query: alter table char_serde_lbc set serde 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@char_serde_lbc +POSTHOOK: Output: default@char_serde_lbc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: insert overwrite table char_serde_lbc + select key, value from char_serde_c +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_c +PREHOOK: Output: default@char_serde_lbc +POSTHOOK: query: insert overwrite table char_serde_lbc + select key, value from char_serde_c +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_c +POSTHOOK: Output: default@char_serde_lbc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: select * from char_serde_lbc limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_lbc +#### A masked pattern was here #### +POSTHOOK: query: select * from char_serde_lbc limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_lbc +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_lbc group by value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_lbc +#### A masked pattern was here #### +POSTHOOK: query: select value, count(*) from char_serde_lbc group by value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_lbc +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +val_0 3 +val_1 2 +val_10 1 +val_100 2 +val_101 2 +PREHOOK: query: -- +-- ORC +-- +create table char_serde_orc ( + key char(15), + value char(20) +) stored as orc +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- +-- ORC +-- +create table char_serde_orc ( + key char(15), + value char(20) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@char_serde_orc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +PREHOOK: type: ALTERTABLE_SERIALIZER +PREHOOK: Input: default@char_serde_orc +PREHOOK: Output: default@char_serde_orc +POSTHOOK: query: alter table char_serde_orc set serde 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +POSTHOOK: type: ALTERTABLE_SERIALIZER +POSTHOOK: Input: default@char_serde_orc +POSTHOOK: Output: default@char_serde_orc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: insert overwrite table char_serde_orc + select key, value from char_serde_lbc +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_lbc +PREHOOK: Output: default@char_serde_orc +POSTHOOK: query: insert overwrite table char_serde_orc + select key, value from char_serde_lbc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_lbc +POSTHOOK: Output: default@char_serde_orc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: select * from char_serde_orc limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_orc +#### A masked pattern was here #### +POSTHOOK: query: select * from char_serde_orc limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +474 val_475 +62 val_63 +468 val_469 +272 val_273 +448 val_449 +PREHOOK: query: select value, count(*) from char_serde_orc group by value limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_serde_orc +#### A masked pattern was here #### +POSTHOOK: query: select value, count(*) from char_serde_orc group by value limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_serde_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +val_0 3 +val_1 2 +val_10 1 +val_100 2 +val_101 2 +PREHOOK: query: drop table if exists char_serde_regex +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_serde_regex +PREHOOK: Output: default@char_serde_regex +POSTHOOK: query: drop table if exists char_serde_regex +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_serde_regex +POSTHOOK: Output: default@char_serde_regex +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: drop table if exists char_serde_lb +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_serde_lb +PREHOOK: Output: default@char_serde_lb +POSTHOOK: query: drop table if exists char_serde_lb +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_serde_lb +POSTHOOK: Output: default@char_serde_lb +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: drop table if exists char_serde_ls +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_serde_ls +PREHOOK: Output: default@char_serde_ls +POSTHOOK: query: drop table if exists char_serde_ls +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_serde_ls +POSTHOOK: Output: default@char_serde_ls +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: drop table if exists char_serde_c +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_serde_c +PREHOOK: Output: default@char_serde_c +POSTHOOK: query: drop table if exists char_serde_c +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_serde_c +POSTHOOK: Output: default@char_serde_c +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: drop table if exists char_serde_lbc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_serde_lbc +PREHOOK: Output: default@char_serde_lbc +POSTHOOK: query: drop table if exists char_serde_lbc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_serde_lbc +POSTHOOK: Output: default@char_serde_lbc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +PREHOOK: query: drop table if exists char_serde_orc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_serde_orc +PREHOOK: Output: default@char_serde_orc +POSTHOOK: query: drop table if exists char_serde_orc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_serde_orc +POSTHOOK: Output: default@char_serde_orc +POSTHOOK: Lineage: char_serde_c.key SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_c.value SIMPLE [(char_serde_ls)char_serde_ls.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.key SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lb.value SIMPLE [(char_serde_regex)char_serde_regex.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.key SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_lbc.value SIMPLE [(char_serde_c)char_serde_c.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.key SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_ls.value SIMPLE [(char_serde_lb)char_serde_lb.FieldSchema(name:value, type:char(20), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.key SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:key, type:char(15), comment:from deserializer), ] +POSTHOOK: Lineage: char_serde_orc.value SIMPLE [(char_serde_lbc)char_serde_lbc.FieldSchema(name:value, type:char(20), comment:from deserializer), ] diff --git serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java index 6726973..4b9a4f9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/RegexSerDe.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.serde.serdeConstants; @@ -37,6 +38,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -191,53 +193,72 @@ public Object deserialize(Writable blob) throws SerDeException { try { String t = m.group(c+1); TypeInfo typeInfo = columnTypes.get(c); - String typeName = typeInfo.getTypeName(); // Convert the column to the correct type when needed and set in row obj - if (typeName.equals(serdeConstants.STRING_TYPE_NAME)) { + PrimitiveTypeInfo pti = (PrimitiveTypeInfo) typeInfo; + switch (pti.getPrimitiveCategory()) { + case STRING: row.set(c, t); - } else if (typeName.equals(serdeConstants.TINYINT_TYPE_NAME)) { + break; + case BYTE: Byte b; b = Byte.valueOf(t); row.set(c,b); - } else if (typeName.equals(serdeConstants.SMALLINT_TYPE_NAME)) { + break; + case SHORT: Short s; s = Short.valueOf(t); row.set(c,s); - } else if (typeName.equals(serdeConstants.INT_TYPE_NAME)) { + break; + case INT: Integer i; i = Integer.valueOf(t); row.set(c, i); - } else if (typeName.equals(serdeConstants.BIGINT_TYPE_NAME)) { + break; + case LONG: Long l; l = Long.valueOf(t); row.set(c, l); - } else if (typeName.equals(serdeConstants.FLOAT_TYPE_NAME)) { + break; + case FLOAT: Float f; f = Float.valueOf(t); row.set(c,f); - } else if (typeName.equals(serdeConstants.DOUBLE_TYPE_NAME)) { + break; + case DOUBLE: Double d; d = Double.valueOf(t); row.set(c,d); - } else if (typeName.equals(serdeConstants.BOOLEAN_TYPE_NAME)) { - Boolean b; - b = Boolean.valueOf(t); - row.set(c, b); - } else if (typeName.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + break; + case BOOLEAN: + Boolean bool; + bool = Boolean.valueOf(t); + row.set(c, bool); + break; + case TIMESTAMP: Timestamp ts; ts = Timestamp.valueOf(t); row.set(c, ts); - } else if (typeName.equals(serdeConstants.DATE_TYPE_NAME)) { - Date d; - d = Date.valueOf(t); - row.set(c, d); - } else if (typeInfo instanceof DecimalTypeInfo) { + break; + case DATE: + Date date; + date = Date.valueOf(t); + row.set(c, date); + break; + case DECIMAL: HiveDecimal bd = HiveDecimal.create(t); row.set(c, bd); - } else if (typeInfo instanceof VarcharTypeInfo) { + break; + case CHAR: + HiveChar hc = new HiveChar(t, ((CharTypeInfo) typeInfo).getLength()); + row.set(c, hc); + break; + case VARCHAR: HiveVarchar hv = new HiveVarchar(t, ((VarcharTypeInfo)typeInfo).getLength()); row.set(c, hv); + break; + default: + throw new SerDeException("Unsupported type " + typeInfo); } } catch (RuntimeException e) { partialMatchedRowsCount++;