diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java index c4268c1..d30d387 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java @@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedSerde; -import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -40,7 +40,7 @@ * A serde class for ORC. * It transparently passes the object to/from the ORC file reader/writer. */ -public class OrcSerde implements SerDe, VectorizedSerde { +public class OrcSerde extends AbstractSerDe implements VectorizedSerde { private static final Log LOG = LogFactory.getLog(OrcSerde.class); diff --git ql/src/test/queries/clientpositive/column_encoding.q ql/src/test/queries/clientpositive/column_encoding.q new file mode 100644 index 0000000..331b654 --- /dev/null +++ ql/src/test/queries/clientpositive/column_encoding.q @@ -0,0 +1,16 @@ +create table encode_test1(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +STORED AS TEXTFILE; + +create table encode_test2(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +STORED AS TEXTFILE; + +from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value); + +select * from encode_test1; +select * from encode_test2; diff --git ql/src/test/results/clientpositive/column_encoding.q.out ql/src/test/results/clientpositive/column_encoding.q.out new file mode 100644 index 0000000..1faaf8a --- /dev/null +++ ql/src/test/results/clientpositive/column_encoding.q.out @@ -0,0 +1,80 @@ +PREHOOK: query: create table encode_test1(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table encode_test1(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@encode_test1 +PREHOOK: query: create table encode_test2(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table encode_test2(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@encode_test2 +PREHOOK: query: from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@encode_test1 +PREHOOK: Output: default@encode_test2 +POSTHOOK: query: from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@encode_test1 +POSTHOOK: Output: default@encode_test2 +POSTHOOK: Lineage: encode_test1.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.name SIMPLE [] +POSTHOOK: Lineage: encode_test1.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.name SIMPLE [] +POSTHOOK: Lineage: encode_test2.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from encode_test1 +PREHOOK: type: QUERY +PREHOOK: Input: default@encode_test1 +#### A masked pattern was here #### +POSTHOOK: query: select * from encode_test1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encode_test1 +#### A masked pattern was here #### +POSTHOOK: Lineage: encode_test1.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.name SIMPLE [] +POSTHOOK: Lineage: encode_test1.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.name SIMPLE [] +POSTHOOK: Lineage: encode_test2.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +238 navis MDEwLTAwMDAtMjM4 U2VvdWwudmFsXzIzOA== +86 navis MDEwLTAwMDAtODY= U2VvdWwudmFsXzg2 +PREHOOK: query: select * from encode_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@encode_test2 +#### A masked pattern was here #### +POSTHOOK: query: select * from encode_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encode_test2 +#### A masked pattern was here #### +POSTHOOK: Lineage: encode_test1.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.name SIMPLE [] +POSTHOOK: Lineage: encode_test1.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.name SIMPLE [] +POSTHOOK: Lineage: encode_test2.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +238 navis 010-0000-238 Seoul.val_238 +86 navis 010-0000-86 Seoul.val_86 diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 515cf25..9fc9a6f 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -61,6 +61,10 @@ public static final String ESCAPE_CHAR = "escape.delim"; + public static final String COLUMN_ENCODE_INDICES = "column.encode.indices"; + + public static final String COLUMN_ENCODE_CLASSNAME = "column.encode.classname"; + public static final String HEADER_COUNT = "skip.header.line.count"; public static final String FOOTER_COUNT = "skip.footer.line.count"; diff --git serde/src/java/org/apache/hadoop/hive/serde2/Base64Rewriter.java serde/src/java/org/apache/hadoop/hive/serde2/Base64Rewriter.java new file mode 100644 index 0000000..55e55de --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/Base64Rewriter.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import org.apache.commons.codec.binary.Base64; + +import java.io.IOException; + +public class Base64Rewriter implements FieldRewriter { + + @Override + public void serialize(ByteStream.Input input, ByteStream.Output output) throws IOException { + output.write(Base64.encodeBase64(input.toBytes())); + } + + @Override + public void deserialize(ByteStream.Input input, ByteStream.Output output) throws IOException { + output.write(Base64.decodeBase64(input.toBytes())); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java index 73d9b29..c4592b9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java +++ serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java @@ -21,6 +21,8 @@ import org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream; import org.apache.hadoop.hive.common.io.NonSyncByteArrayOutputStream; +import java.util.Arrays; + /** * Extensions to bytearrayinput/output streams. * @@ -45,6 +47,10 @@ public void reset(byte[] argBuf, int argCount) { count = argCount; } + public byte[] toBytes() { + return Arrays.copyOfRange(buf, pos, count); + } + public Input() { super(new byte[1]); } @@ -79,5 +85,10 @@ public Output() { public Output(int size) { super(size); } + + public void writeTo(int pos, Output output) { + count = pos; + write(output.buf, 0, output.count); + } } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java index 179f9b5..becae0a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java @@ -18,10 +18,9 @@ package org.apache.hadoop.hive.serde2; -import java.io.IOException; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -29,6 +28,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Writable; +import java.util.Properties; + /** * DelimitedJSONSerDe. * @@ -42,6 +43,14 @@ public DelimitedJSONSerDe() throws SerDeException { } + private String nullStr; + + @Override + public void initialize(Configuration job, Properties tbl) + throws SerDeException { + super.initialize(job, tbl); + nullStr = serdeParams.getNullSequence().toString(); + } /** * Not implemented. */ @@ -53,22 +62,13 @@ public Object deserialize(Writable field) throws SerDeException { @Override protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector objInspector, - SerDeParameters serdeParams) throws SerDeException { - if (!objInspector.getCategory().equals(Category.PRIMITIVE) || (objInspector.getTypeName().equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME))) { + SerDeParameters serdeParams, int index) throws SerDeException { + if (!objInspector.getCategory().equals(Category.PRIMITIVE) || + (objInspector.getTypeName().equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME))) { //do this for all complex types and binary - try { - serialize(out, SerDeUtils.getJSONString(obj, objInspector, serdeParams.getNullSequence().toString()), - PrimitiveObjectInspectorFactory.javaStringObjectInspector, serdeParams.getSeparators(), - 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), - serdeParams.getNeedsEscape()); - - } catch (IOException e) { - throw new SerDeException(e); - } - - } else { - //primitives except binary - super.serializeField(out, obj, objInspector, serdeParams); + obj = SerDeUtils.getJSONString(obj, objInspector, nullStr); + objInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector; } + super.serializeField(out, obj, objInspector, serdeParams, index); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/FieldRewriter.java serde/src/java/org/apache/hadoop/hive/serde2/FieldRewriter.java new file mode 100644 index 0000000..88109bd --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/FieldRewriter.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import java.io.IOException; + +/** + * rewrites binary shaped of a column and vise versa. + * Currently, it's only supported by LazySimpleSerde and sub-class + */ +public interface FieldRewriter { + + void serialize(ByteStream.Input input, ByteStream.Output output) throws IOException; + + void deserialize(ByteStream.Input input, ByteStream.Output output) throws IOException; + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java index 157600e..939a3a0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java @@ -22,7 +22,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; @@ -61,11 +60,10 @@ public ColumnarStruct(ObjectInspector oi, List notSkippedColumnIDs, Tex } @Override - protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef, + protected int getLength(ObjectInspector objectInspector, byte[] bytes, int start, int fieldLen) { if (fieldLen == lengthNullSequence) { - byte[] data = cachedByteArrayRef.getData(); - if (LazyUtils.compare(data, start, fieldLen, + if (LazyUtils.compare(bytes, start, fieldLen, nullSequence.getBytes(), 0, lengthNullSequence) == 0) { return -1; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java index 1fd6853..708325d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java @@ -23,7 +23,6 @@ import java.util.List; import org.apache.hadoop.hive.serde2.SerDeStatsStruct; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -37,7 +36,6 @@ * use an array instead of only one object in case in future hive does not do * the byte copy. */ - ByteArrayRef cachedByteArrayRef; BytesRefWritable rawBytesField; boolean inited; boolean fieldSkipped; @@ -45,7 +43,6 @@ public FieldInfo(LazyObjectBase lazyObject, boolean fieldSkipped, ObjectInspector oi) { field = lazyObject; - cachedByteArrayRef = new ByteArrayRef(); objectInspector = oi; if (fieldSkipped) { this.fieldSkipped = true; @@ -94,23 +91,24 @@ protected Object uncheckedGetField() { if (fieldSkipped) { return null; } + byte[] data; + try { + data = rawBytesField.getData(); + } catch (IOException e) { + throw new RuntimeException(e); + } if (!inited) { - try { - cachedByteArrayRef.setData(rawBytesField.getData()); - } catch (IOException e) { - throw new RuntimeException(e); - } inited = true; - int byteLength = getLength(objectInspector, cachedByteArrayRef, rawBytesField.getStart(), + int byteLength = getLength(objectInspector, data, rawBytesField.getStart(), rawBytesField.getLength()); if (byteLength == -1) { return null; } - field.init(cachedByteArrayRef, rawBytesField.getStart(), byteLength); + field.init(data, rawBytesField.getStart(), byteLength); return field.getObject(); } else { - if (getLength(objectInspector, cachedByteArrayRef, rawBytesField.getStart(), + if (getLength(objectInspector, data, rawBytesField.getStart(), rawBytesField.getLength()) == -1) { return null; } @@ -174,8 +172,9 @@ public Object getField(int fieldID) { /** * Check if the object is null and return the length of the stream * + * * @param objectInspector - * @param cachedByteArrayRef + * @param bytes * the bytes of the object * @param start * the start offset @@ -185,7 +184,7 @@ public Object getField(int fieldID) { * @return -1 for null, >=0 for length */ protected abstract int getLength(ObjectInspector objectInspector, - ByteArrayRef cachedByteArrayRef, int start, int length); + byte[] bytes, int start, int length); /** * create the lazy object for this field diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java index 6d9715a..540b548 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java @@ -20,7 +20,6 @@ import java.util.List; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -35,7 +34,7 @@ public LazyBinaryColumnarStruct(ObjectInspector oi, List notSkippedColu } @Override - protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef, + protected int getLength(ObjectInspector objectInspector, byte[] bytes, int start, int length) { if (length == 0) { return -1; @@ -45,7 +44,7 @@ protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByte PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector) .getPrimitiveCategory(); if (primitiveCategory.equals(PrimitiveCategory.STRING) && (length == 1) && - (cachedByteArrayRef.getData()[start] + (bytes[start] == LazyBinaryColumnarSerDe.INVALID_UTF__SINGLE_BYTE[0])) { return 0; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java index ce0cfb3..f137c31 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java @@ -75,10 +75,10 @@ protected LazyArray(LazyListObjectInspector oi) { /** * Set the row data for this LazyArray. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; cachedList = null; @@ -117,8 +117,6 @@ private void parse() { return; } - byte[] bytes = this.bytes.getData(); - arrayLength = 0; int arrayByteEnd = start + length; int elementByteBegin = start; @@ -185,7 +183,7 @@ private Object uncheckedGetElement(int index) { int elementLength = startPosition[index + 1] - startPosition[index] - 1; if (elementLength == nullSequence.getLength() && 0 == LazyUtils - .compare(bytes.getData(), startPosition[index], elementLength, + .compare(bytes, startPosition[index], elementLength, nullSequence.getBytes(), 0, nullSequence.getLength())) { return arrayElements[index] = null; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java index ae12f20..353d36b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java @@ -18,14 +18,11 @@ package org.apache.hadoop.hive.serde2.lazy; -import java.nio.charset.CharacterCodingException; - import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; public class LazyBinary extends LazyPrimitive { @@ -45,10 +42,10 @@ public LazyBinary(LazyBinary other){ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { byte[] recv = new byte[length]; - System.arraycopy(bytes.getData(), start, recv, 0, length); + System.arraycopy(bytes, start, recv, 0, length); boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); if (arrayByteBase64) { LOG.debug("Data not contains valid characters within the Base64 alphabet so " + diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java index c741c3a..ba2a25a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java @@ -47,19 +47,19 @@ public LazyBoolean(LazyBoolean copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { - if (length == 4 && Character.toUpperCase(bytes.getData()[start]) == 'T' - && Character.toUpperCase(bytes.getData()[start + 1]) == 'R' - && Character.toUpperCase(bytes.getData()[start + 2]) == 'U' - && Character.toUpperCase(bytes.getData()[start + 3]) == 'E') { + public void init(byte[] bytes, int start, int length) { + if (length == 4 && Character.toUpperCase(bytes[start]) == 'T' + && Character.toUpperCase(bytes[start + 1]) == 'R' + && Character.toUpperCase(bytes[start + 2]) == 'U' + && Character.toUpperCase(bytes[start + 3]) == 'E') { data.set(true); isNull = false; } else if (length == 5 - && Character.toUpperCase(bytes.getData()[start]) == 'F' - && Character.toUpperCase(bytes.getData()[start + 1]) == 'A' - && Character.toUpperCase(bytes.getData()[start + 2]) == 'L' - && Character.toUpperCase(bytes.getData()[start + 3]) == 'S' - && Character.toUpperCase(bytes.getData()[start + 4]) == 'E') { + && Character.toUpperCase(bytes[start]) == 'F' + && Character.toUpperCase(bytes[start + 1]) == 'A' + && Character.toUpperCase(bytes[start + 2]) == 'L' + && Character.toUpperCase(bytes[start + 3]) == 'S' + && Character.toUpperCase(bytes[start + 4]) == 'E') { data.set(false); isNull = false; } else { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java index a3b8f76..5fefaae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java @@ -47,9 +47,9 @@ public LazyByte(LazyByte copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseByte(bytes.getData(), start, length, 10)); + data.set(parseByte(bytes, start, length, 10)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java index 7af2374..fc06802 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java @@ -57,10 +57,10 @@ public LazyDate(LazyDate copy) { * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String s = null; try { - s = Text.decode(bytes.getData(), start, length); + s = Text.decode(bytes, start, length); data.set(Date.valueOf(s)); isNull = false; } catch (Exception e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java index 05ca4e9..5e0928b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java @@ -44,10 +44,10 @@ public LazyDouble(LazyDouble copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(Double.parseDouble(byteData)); isNull = false; } catch (NumberFormatException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java index 37676d1..fc644c7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java @@ -44,10 +44,10 @@ public LazyFloat(LazyFloat copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(Float.parseFloat(byteData)); isNull = false; } catch (NumberFormatException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java index ef469eb..b21a545 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java @@ -54,10 +54,10 @@ public void setValue(LazyHiveChar copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(byteData, maxLength); isNull = false; } catch (CharacterCodingException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java index 78cc381..83d13d0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java @@ -62,10 +62,10 @@ public LazyHiveDecimal(LazyHiveDecimal copy) { * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); } catch (CharacterCodingException e) { isNull = true; LOG.debug("Data not in the HiveDecimal data type range so converted to null.", e); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java index bc8d41e..2a3a0ec 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java @@ -54,10 +54,10 @@ public void setValue(LazyHiveVarchar copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(byteData, maxLength); isNull = false; } catch (CharacterCodingException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java index ad82ebf..bfb79ad 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java @@ -50,9 +50,9 @@ public LazyInteger(LazyInteger copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseInt(bytes.getData(), start, length, 10)); + data.set(parseInt(bytes, start, length, 10)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java index a9779a0..3cc2163 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java @@ -50,9 +50,9 @@ public LazyLong(LazyLong copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseLong(bytes.getData(), start, length, 10)); + data.set(parseLong(bytes, start, length, 10)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java index 84c2a5e..678f911 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java @@ -85,10 +85,10 @@ protected LazyMap(LazyMapObjectInspector oi) { /** * Set the row data for this LazyArray. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; cachedMap = null; @@ -139,7 +139,6 @@ private void parse() { int elementByteBegin = start; int keyValueSeparatorPosition = -1; int elementByteEnd = start; - byte[] bytes = this.bytes.getData(); // Go through all bytes in the byte[] while (elementByteEnd <= arrayByteEnd) { @@ -243,7 +242,7 @@ private LazyObject uncheckedGetValue(int index) { int valueILength = keyStart[index + 1] - 1 - valueIBegin; if (valueILength < 0 || ((valueILength == nullSequence.getLength()) && 0 == LazyUtils - .compare(bytes.getData(), valueIBegin, valueILength, nullSequence + .compare(bytes, valueIBegin, valueILength, nullSequence .getBytes(), 0, nullSequence.getLength()))) { return valueObjects[index] = null; } @@ -270,7 +269,7 @@ private LazyObject uncheckedGetValue(int index) { int keyILength = keyEnd[index] - keyStart[index]; if (keyILength < 0 || ((keyILength == nullSequence.getLength()) && 0 == LazyUtils.compare( - bytes.getData(), keyIBegin, keyILength, nullSequence.getBytes(), 0, + bytes, keyIBegin, keyILength, nullSequence.getBytes(), 0, nullSequence.getLength()))) { return keyObjects[index] = null; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java index e4cffc9..2c18688 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java @@ -25,7 +25,7 @@ public abstract class LazyNonPrimitive extends LazyObject { - protected ByteArrayRef bytes; + protected byte[] bytes; protected int start; protected int length; @@ -48,7 +48,7 @@ protected LazyNonPrimitive(OI oi) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { if (bytes == null) { throw new RuntimeException("bytes cannot be null!"); } @@ -56,7 +56,7 @@ public void init(ByteArrayRef bytes, int start, int length) { this.start = start; this.length = length; assert start >= 0; - assert start + length <= bytes.getData().length; + assert start + length <= bytes.length; } @Override @@ -66,6 +66,6 @@ public Object getObject() { @Override public int hashCode() { - return LazyUtils.hashBytes(bytes.getData(), start, length); + return LazyUtils.hashBytes(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java index 3334dff..5e67975 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java @@ -25,15 +25,15 @@ * that we will be able to drop the reference to byte[] by a single * assignment. The ByteArrayRef object can be reused across multiple rows. * + * * @param bytes * The wrapper of the byte[]. * @param start * The start position inside the bytes. * @param length * The length of the data, starting from "start" - * @see ByteArrayRef */ - public abstract void init(ByteArrayRef bytes, int start, int length); + public abstract void init(byte[] bytes, int start, int length); /** * If the LazyObjectBase is a primitive Object, then deserialize it and return the diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java index 222b9bc..bdbc2c7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java @@ -67,10 +67,10 @@ public int hashCode() { return isNull ? 0 : data.hashCode(); } - public void logExceptionMessage(ByteArrayRef bytes, int start, int length, String dataType) { + public void logExceptionMessage(byte[] bytes, int start, int length, String dataType) { try { if(LOG.isDebugEnabled()) { - String byteData = Text.decode(bytes.getData(), start, length); + String byteData = Text.decode(bytes, start, length); LOG.debug("Data not in the " + dataType + " data type range so converted to null. Given data is :" + byteData, new Exception("For debugging purposes")); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java index f04e131..6237994 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java @@ -47,9 +47,9 @@ public LazyShort(LazyShort copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseShort(bytes.getData(), start, length)); + data.set(parseShort(bytes, start, length)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index 606208c..8d9d92e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -30,6 +31,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.FieldRewriter; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; @@ -49,6 +51,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; /** * LazySimpleSerDe can be used to read the same data format as @@ -128,6 +131,11 @@ public static byte getByte(String altValue, byte defaultVal) { boolean escaped; byte escapeChar; boolean[] needsEscape; + boolean[] needEncoding; + FieldRewriter rewriter; + + transient final ByteStream.Input input = new ByteStream.Input(); + transient final ByteStream.Output output = new ByteStream.Output(); public List getColumnTypes() { return columnTypes; @@ -168,9 +176,13 @@ public byte getEscapeChar() { public boolean[] getNeedsEscape() { return needsEscape; } + + public boolean isEncoded(int index) { + return needEncoding != null && needEncoding[index]; + } } - SerDeParameters serdeParams = null; + protected SerDeParameters serdeParams = null; /** * Initialize the SerDe given the parameters. serialization.format: separator @@ -197,6 +209,8 @@ public void initialize(Configuration job, Properties tbl) cachedLazyStruct = (LazyStruct) LazyFactory .createLazyObject(cachedObjectInspector); + cachedLazyStruct.setSerdeParams(serdeParams); + LOG.debug(getClass().getName() + " initialized with: columnNames=" + serdeParams.columnNames + " columnTypes=" + serdeParams.columnTypes + " separator=" + Arrays.asList(serdeParams.separators) @@ -308,15 +322,37 @@ public static SerDeParameters initSerdeParams(Configuration job, } } + String encodeCols = tbl.getProperty(serdeConstants.COLUMN_ENCODE_INDICES); + if (encodeCols != null) { + TreeSet indices = new TreeSet(); + for (String index : encodeCols.split(",")) { + indices.add(Integer.parseInt(index.trim())); + } + serdeParams.needEncoding = new boolean[indices.last() + 1]; + for (int index : indices) { + serdeParams.needEncoding[index] = true; + } + } + + String encoderClass = tbl.getProperty(serdeConstants.COLUMN_ENCODE_CLASSNAME); + if (encoderClass != null) { + try { + serdeParams.rewriter = + (FieldRewriter) ReflectionUtils.newInstance(Class.forName(encoderClass), job); + } catch (Exception e) { + throw new SerDeException(e); + } + } + if (serdeParams.needEncoding != null && serdeParams.rewriter == null) { + throw new SerDeException("Encoder is not specified by serde property 'column.encode.classname'"); + } + return serdeParams; } // The object for storing row data LazyStruct cachedLazyStruct; - // The wrapper for byte array - ByteArrayRef byteArrayRef; - /** * Deserialize a row from the Writable to a LazyObject. * @@ -327,18 +363,13 @@ public static SerDeParameters initSerdeParams(Configuration job, */ @Override public Object deserialize(Writable field) throws SerDeException { - if (byteArrayRef == null) { - byteArrayRef = new ByteArrayRef(); - } if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; // For backward-compatibility with hadoop 0.17 - byteArrayRef.setData(b.getBytes()); - cachedLazyStruct.init(byteArrayRef, 0, b.getLength()); + cachedLazyStruct.init(b.getBytes(), 0, b.getLength()); } else if (field instanceof Text) { Text t = (Text) field; - byteArrayRef.setData(t.getBytes()); - cachedLazyStruct.init(byteArrayRef, 0, t.getLength()); + cachedLazyStruct.init(t.getBytes(), 0, t.getLength()); } else { throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!"); @@ -420,7 +451,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector)); } - serializeField(serializeStream, f, foi, serdeParams); + serializeField(serializeStream, f, foi, serdeParams, i); } // TODO: The copy of data is unnecessary, but there is no work-around @@ -434,10 +465,17 @@ public Writable serialize(Object obj, ObjectInspector objInspector) } protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector objInspector, - SerDeParameters serdeParams) throws SerDeException { + SerDeParameters serdeParams, int index) throws SerDeException { try { + int pos = out.getCount(); serialize(out, obj, objInspector, serdeParams.separators, 1, serdeParams.nullSequence, serdeParams.escaped, serdeParams.escapeChar, serdeParams.needsEscape); + if (serdeParams.isEncoded(index)) { + serdeParams.input.reset(out.getData(), pos, out.getCount() - pos); + serdeParams.rewriter.serialize(serdeParams.input, serdeParams.output); + out.writeTo(pos, serdeParams.output); + serdeParams.output.reset(); + } } catch (IOException e) { throw new SerDeException(e); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java index 28b3f86..067f095 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java @@ -36,15 +36,14 @@ public LazyString(LazyString copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { if (oi.isEscaped()) { byte escapeChar = oi.getEscapeChar(); - byte[] inputBytes = bytes.getData(); // First calculate the length of the output string int outputLength = 0; for (int i = 0; i < length; i++) { - if (inputBytes[start + i] != escapeChar) { + if (bytes[start + i] != escapeChar) { outputLength++; } else { outputLength++; @@ -54,7 +53,7 @@ public void init(ByteArrayRef bytes, int start, int length) { // Copy the data over, so that the internal state of Text will be set to // the required outputLength. - data.set(bytes.getData(), start, outputLength); + data.set(bytes, start, outputLength); // We need to copy the data byte by byte only in case the // "outputLength < length" (which means there is at least one escaped @@ -63,20 +62,20 @@ public void init(ByteArrayRef bytes, int start, int length) { int k = 0; byte[] outputBytes = data.getBytes(); for (int i = 0; i < length; i++) { - byte b = inputBytes[start + i]; + byte b = bytes[start + i]; if (b != escapeChar || i == length - 1) { outputBytes[k++] = b; } else { // get the next byte i++; - outputBytes[k++] = inputBytes[start + i]; + outputBytes[k++] = bytes[start + i]; } } assert (k == outputLength); } } else { // if the data is not escaped, simply copy the data. - data.set(bytes.getData(), start, length); + data.set(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java index 8a1ea46..36899d3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -26,7 +27,6 @@ import org.apache.hadoop.hive.serde2.SerDeStatsStruct; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.Text; /** @@ -68,6 +68,9 @@ */ boolean[] fieldInited; + // non-null only for top level object (ie. row) + LazySimpleSerDe.SerDeParameters serdeParams; + /** * Construct a LazyStruct object with the ObjectInspector. */ @@ -78,10 +81,10 @@ public LazyStruct(LazySimpleStructObjectInspector oi) { /** * Set the row data for this LazyStruct. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; serializedSize = length; @@ -101,8 +104,7 @@ private void parse() { byte escapeChar = oi.getEscapeChar(); if (fields == null) { - List fieldRefs = ((StructObjectInspector) oi) - .getAllStructFieldRefs(); + List fieldRefs = oi.getAllStructFieldRefs(); fields = new LazyObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i) @@ -118,7 +120,6 @@ private void parse() { int fieldId = 0; int fieldByteBegin = start; int fieldByteEnd = start; - byte[] bytes = this.bytes.getData(); // Go through all bytes in the byte[] while (fieldByteEnd <= structByteEnd) { @@ -210,13 +211,26 @@ private Object uncheckedGetField(int fieldID) { int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; if ((fieldLength < 0) || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes - .getData(), fieldByteBegin, fieldLength, nullSequence.getBytes(), + , fieldByteBegin, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0)) { return null; } + byte[] binary = bytes; if (!fieldInited[fieldID]) { fieldInited[fieldID] = true; - fields[fieldID].init(bytes, fieldByteBegin, fieldLength); + if (serdeParams != null && serdeParams.isEncoded(fieldID)) { + serdeParams.input.reset(bytes, fieldByteBegin, fieldLength); + serdeParams.output.reset(); + try { + serdeParams.rewriter.deserialize(serdeParams.input, serdeParams.output); + } catch (IOException e) { + throw new RuntimeException(e); + } + binary = serdeParams.output.getData(); + fieldLength = serdeParams.output.getCount(); + fieldByteBegin = 0; + } + fields[fieldID].init(binary, fieldByteBegin, fieldLength); } return fields[fieldID].getObject(); } @@ -275,4 +289,8 @@ protected void setFieldInited(boolean[] fieldInited) { public long getRawDataSerializedSize() { return serializedSize; } + + public void setSerdeParams(LazySimpleSerDe.SerDeParameters serdeParams) { + this.serdeParams = serdeParams; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java index 27895c5..102c020 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java @@ -57,10 +57,10 @@ public LazyTimestamp(LazyTimestamp copy) { * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String s = null; try { - s = new String(bytes.getData(), start, length, "US-ASCII"); + s = new String(bytes, start, length, "US-ASCII"); } catch (UnsupportedEncodingException e) { LOG.error(e); s = ""; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java index 9f6bc3f..a50859f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java @@ -63,10 +63,10 @@ public LazyUnion(LazyUnionObjectInspector oi) { /** * Set the row data for this LazyUnion. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; } @@ -87,7 +87,6 @@ private void parse() { int unionByteEnd = start + length; int fieldByteEnd = start; - byte[] bytes = this.bytes.getData(); // Go through all bytes in the byte[] while (fieldByteEnd < unionByteEnd) { if (bytes[fieldByteEnd] != separator) { @@ -127,7 +126,7 @@ private Object uncheckedGetField() { Text nullSequence = oi.getNullSequence(); int fieldLength = start + length - startPosition; if (fieldLength != 0 && fieldLength == nullSequence.getLength() && - LazyUtils.compare(bytes.getData(), startPosition, fieldLength, + LazyUtils.compare(bytes, startPosition, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) { return null; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java index 13d1b11..cd7bd12 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java @@ -31,6 +31,6 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java index 55f96ee..af6c8fe 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java @@ -21,8 +21,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; -import org.apache.hadoop.hive.serde2.lazy.LazyObject; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector; @@ -91,10 +89,10 @@ protected LazyBinaryArray(LazyBinaryListObjectInspector oi) { /** * Set the row data for this LazyBinaryArray. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see org.apache.hadoop.hive.serde2.lazy.LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; } @@ -122,8 +120,6 @@ private void adjustArraySize(int newSize) { */ private void parse() { - byte[] bytes = this.bytes.getData(); - // get the vlong that represents the map size LazyBinaryUtils.readVInt(bytes, start, vInt); arraySize = vInt.value; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java index d398285..6246d84 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.io.BytesWritable; @@ -38,8 +37,8 @@ public LazyBinaryBinary(WritableBinaryObjectInspector baoi) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (length > -1); - data.set(bytes.getData(), start, length); + data.set(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java index 96969fb..545d304 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.io.BooleanWritable; @@ -48,9 +47,9 @@ public LazyBinaryBoolean(LazyBinaryBoolean copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (1 == length); - byte val = bytes.getData()[start]; + byte val = bytes[start]; if (val == 0) { data.set(false); } else if (val == 1) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java index d430c96..b6b377f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; /** @@ -38,8 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (1 == length); - data.set(bytes.getData()[start]); + data.set(bytes[start]); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java index d0c2504..889beb2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; @@ -54,7 +53,7 @@ * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { - data.setFromBytes(bytes.getData(), start, length, vInt); + public void init(byte[] bytes, int start, int length) { + data.setFromBytes(bytes, start, length, vInt); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java index c2c4f9e..b9889a5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; /** @@ -38,9 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (8 == length); - data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes - .getData(), start))); + data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, start))); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java index 11e13dc..526e8ea 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; import org.apache.hadoop.io.FloatWritable; @@ -38,9 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (4 == length); - data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes - .getData(), start))); + data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, start))); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java index b64a3b4..52b71c4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.io.Text; @@ -41,10 +40,10 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { // re-use existing text member in char writable Text textValue = data.getTextValue(); - textValue.set(bytes.getData(), start, length); + textValue.set(bytes, start, length); data.enforceMaxLength(maxLength); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java index e56e2ca..7525f80 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java @@ -19,7 +19,6 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -43,8 +42,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { - data.setFromBytes(bytes.getData(), start, length); + public void init(byte[] bytes, int start, int length) { + data.setFromBytes(bytes, start, length); HiveDecimal dec = data.getHiveDecimal(precision, scale); data = dec == null ? null : new HiveDecimalWritable(dec); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java index 26df6f3..f96e49e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; @@ -41,10 +40,10 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { // re-use existing text member in varchar writable Text textValue = data.getTextValue(); - textValue.set(bytes.getData(), start, length); + textValue.set(bytes, start, length); data.enforceMaxLength(maxLength); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java index f2a6943..7452103 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; import org.apache.hadoop.io.IntWritable; @@ -46,8 +45,8 @@ VInt vInt = new LazyBinaryUtils.VInt(); @Override - public void init(ByteArrayRef bytes, int start, int length) { - LazyBinaryUtils.readVInt(bytes.getData(), start, vInt); + public void init(byte[] bytes, int start, int length) { + LazyBinaryUtils.readVInt(bytes, start, vInt); assert (length == vInt.length); data.set(vInt.value); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java index ea0eba2..073e6d4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; import org.apache.hadoop.io.LongWritable; @@ -46,8 +45,8 @@ VLong vLong = new LazyBinaryUtils.VLong(); @Override - public void init(ByteArrayRef bytes, int start, int length) { - LazyBinaryUtils.readVLong(bytes.getData(), start, vLong); + public void init(byte[] bytes, int start, int length) { + LazyBinaryUtils.readVLong(bytes, start, vLong); assert (length == vLong.length); data.set(vLong.value); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java index 31ad78e..a866197 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java @@ -23,7 +23,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; @@ -100,10 +99,10 @@ protected LazyBinaryMap(LazyBinaryMapObjectInspector oi) { /** * Set the row data for this LazyBinaryMap. * - * @see LazyBinaryObject#init(ByteArrayRef, int, int) + * @see org.apache.hadoop.hive.serde2.lazy.LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; } @@ -137,8 +136,6 @@ protected void adjustArraySize(int newSize) { */ private void parse() { - byte[] bytes = this.bytes.getData(); - // get the VInt that represents the map size LazyBinaryUtils.readVInt(bytes, start, vInt); mapSize = vInt.value; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java index 1d0783f..aa50c86 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -29,7 +28,7 @@ public abstract class LazyBinaryNonPrimitive extends LazyBinaryObject { - protected ByteArrayRef bytes; + protected byte[] bytes; protected int start; protected int length; @@ -46,7 +45,7 @@ public Object getObject() { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { if (null == bytes) { throw new RuntimeException("bytes cannot be null!"); } @@ -60,6 +59,6 @@ public void init(ByteArrayRef bytes, int start, int length) { @Override public int hashCode() { - return LazyUtils.hashBytes(bytes.getData(), start, length); + return LazyUtils.hashBytes(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index 0324453..173e2b8 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -36,7 +36,6 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -148,32 +147,24 @@ public ObjectInspector getObjectInspector() throws SerDeException { return BytesWritable.class; } - // The wrapper for byte array - ByteArrayRef byteArrayRef; - /** * Deserialize a table record to a lazybinary struct. */ @Override public Object deserialize(Writable field) throws SerDeException { - if (byteArrayRef == null) { - byteArrayRef = new ByteArrayRef(); - } if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; if (b.getLength() == 0) { return null; } // For backward-compatibility with hadoop 0.17 - byteArrayRef.setData(b.getBytes()); - cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getLength()); + cachedLazyBinaryStruct.init(b.getBytes(), 0, b.getLength()); } else if (field instanceof Text) { Text t = (Text) field; if (t.getLength() == 0) { return null; } - byteArrayRef.setData(t.getBytes()); - cachedLazyBinaryStruct.init(byteArrayRef, 0, t.getLength()); + cachedLazyBinaryStruct.init(t.getBytes(), 0, t.getLength()); } else { throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java index 1bc01ce..b5d58a6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; /** @@ -38,8 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (2 == length); - data.set(LazyBinaryUtils.byteArrayToShort(bytes.getData(), start)); + data.set(LazyBinaryUtils.byteArrayToShort(bytes, start)); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java index 9f691d7..4d2292a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.io.Text; @@ -43,8 +42,8 @@ public LazyBinaryString(LazyBinaryString copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (length > -1); - data.set(bytes.getData(), start, length); + data.set(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java index caf3517..36f1e90 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java @@ -24,7 +24,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.SerDeStatsStruct; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -88,7 +87,7 @@ protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; serializedSize = length; @@ -126,7 +125,6 @@ private void parse() { int fieldId = 0; int structByteEnd = start + length; - byte[] bytes = this.bytes.getData(); byte nullByte = bytes[start]; int lastFieldByteEnd = start + 1; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java index 98dd81c..0eeb842 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; /** @@ -47,11 +46,10 @@ * @param bytes * @param start * @param length - * If length is 4, no decimal bytes follow, otherwise read following bytes - * as VInt and reverse its value + * If length is 4, no decimal bytes follow, otherwise read following bytes */ @Override - public void init(ByteArrayRef bytes, int start, int length) { - data.set(bytes.getData(), start); + public void init(byte[] bytes, int start, int length) { + data.set(bytes, start); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java index 4b7153d..4103db6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableVoidObjectInspector; import org.apache.hadoop.io.Writable; @@ -36,6 +35,6 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java index 64a237e..b3cde9d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBoolean; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; /** * LazyBooleanBinary for storing a boolean value as an BooleanWritable. This class complements class - * LazyBoolean. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyBoolean. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the boolean value stored from the default binary format. */ public class LazyDioBoolean extends LazyBoolean { @@ -52,12 +51,12 @@ public LazyDioBoolean(LazyDioBoolean copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { boolean value = false; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readBoolean(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java index 8cdbfd8..c9e0dc5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyByte; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; /** * LazyByteBinary for storing a byte value as a ByteWritable. This class complements class - * LazyByte. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyByte. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the raw byte value stored. */ public class LazyDioByte extends LazyByte { @@ -45,12 +44,12 @@ public LazyDioByte(LazyDioByte copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { byte value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readByte(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java index c86705d..c92f7db 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyDouble; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; /** * LazyDoubleBinary for storing a double value as a DoubleWritable. This class complements class - * LazyDouble. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyDouble. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the double value stored from the default binary format. */ public class LazyDioDouble extends LazyDouble { @@ -52,12 +51,12 @@ public LazyDioDouble(LazyDoubleObjectInspector oi) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { double value = 0.0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readDouble(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java index fc3c1b2..fa393d9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFloat; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; /** * LazyFloatBinary for storing a float value as a FloatWritable. This class complements class - * LazyFloat. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyFloat. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the float value stored from the default binary format. */ public class LazyDioFloat extends LazyFloat { @@ -52,12 +51,12 @@ public LazyDioFloat(LazyDioFloat copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { float value = 0.0F; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readFloat(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java index e7eca65..513ba66 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; /** * LazyIntegerBinary for storing an int value as an IntWritable. This class complements class - * LazyInteger. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyInteger. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the integer value stored from the default binary format. */ public class LazyDioInteger extends LazyInteger { @@ -52,12 +51,12 @@ public LazyDioInteger(LazyDioInteger copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { int value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readInt(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java index 98c7058..3d03480 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; /** * LazyLongBinary for storing a long value as a LongWritable. This class complements class - * LazyLong. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyLong. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the long value stored in the default binary format. */ public class LazyDioLong extends LazyLong { @@ -52,12 +51,12 @@ public LazyDioLong(LazyDioLong copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { long value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readLong(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java index 137d0c8..b175766 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; /** * LazyShortBinary for storing a short value as a ShortWritable. This class complements class - * LazyShort. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyShort. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the short value stored from the default binary format. */ public class LazyDioShort extends LazyShort { @@ -52,12 +51,12 @@ public LazyDioShort(LazyDioShort copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { short value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readShort(); data.set(value); diff --git serde/src/test/org/apache/hadoop/hive/serde2/Base64WriteOnly.java serde/src/test/org/apache/hadoop/hive/serde2/Base64WriteOnly.java new file mode 100644 index 0000000..9c11001 --- /dev/null +++ serde/src/test/org/apache/hadoop/hive/serde2/Base64WriteOnly.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import org.apache.commons.codec.binary.Base64; + +import java.io.IOException; + +public class Base64WriteOnly implements FieldRewriter { + + @Override + public void serialize(ByteStream.Input input, ByteStream.Output output) throws IOException { + output.write(Base64.encodeBase64(input.toBytes())); + } + + @Override + public void deserialize(ByteStream.Input input, ByteStream.Output output) throws IOException { + output.write(input.toBytes()); + } +} diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java index 868dd4c..40a0a81 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java @@ -44,9 +44,7 @@ */ public static void initLazyObject(LazyObject lo, byte[] data, int start, int length) { - ByteArrayRef b = new ByteArrayRef(); - b.setData(data); - lo.init(b, start, length); + lo.init(data, start, length); } /** diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java index 69c891d..3f5491a 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBinary; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector; @@ -48,7 +47,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector; @@ -600,15 +598,12 @@ public void testWritableBinaryObjectInspector() throws Throwable { public void testLazyBinaryObjectInspector() throws Throwable { //create input ByteArrayRef - ByteArrayRef inpBARef = new ByteArrayRef(); - inpBARef.setData(inpBArray); - AbstractPrimitiveLazyObjectInspector binInspector = LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(TypeInfoFactory.binaryTypeInfo, false, (byte)0); //create LazyBinary initialed with inputBA LazyBinary lazyBin = (LazyBinary) LazyFactory.createLazyObject(binInspector); - lazyBin.init(inpBARef, 0, inpBArray.length); + lazyBin.init(inpBArray, 0, inpBArray.length); //use inspector to get a byte[] out of LazyBinary byte[] outBARef = (byte[]) binInspector.getPrimitiveJavaObject(lazyBin);