diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java index 5008f15..302844d 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java @@ -20,10 +20,7 @@ import java.util.ArrayList; import java.util.List; -import java.util.Properties; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObject; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; @@ -87,12 +84,8 @@ public HBaseCompositeKey(LazySimpleStructObjectInspector oi) { LazyObject lazyObject = LazyFactory .createLazyObject(fieldOI); - ByteArrayRef ref = new ByteArrayRef(); - - ref.setData(bytes); - // initialize the lazy object - lazyObject.init(ref, 0, ref.getData().length); + lazyObject.init(bytes, 0, bytes.length); return lazyObject; } diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java index 5fe35a5..1ee7d32 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java @@ -28,10 +28,12 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.FieldRewritable; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; @@ -53,7 +55,7 @@ * HBaseSerDe can be used to serialize object into an HBase table and * deserialize objects from an HBase table. */ -public class HBaseSerDe extends AbstractSerDe { +public class HBaseSerDe extends AbstractSerDe implements FieldRewritable { public static final Log LOG = LogFactory.getLog(HBaseSerDe.class); public static final String HBASE_COLUMNS_MAPPING = "hbase.columns.mapping"; @@ -111,6 +113,7 @@ public void initialize(Configuration conf, Properties tbl) serdeParams.getEscapeChar()); cachedHBaseRow = new LazyHBaseRow((LazySimpleStructObjectInspector) cachedObjectInspector); + cachedHBaseRow.setSerdeParams(serdeParams.getSerdeParams()); if (serdeParams.getCompositeKeyClass() != null) { // initialize the constructor of the composite key class with its object inspector @@ -334,20 +337,21 @@ public Writable serialize(Object obj, ObjectInspector objInspector) return null; } + ByteStream.Output out = new ByteStream.Output(); boolean writeBinary = serdeParams.getKeyColumnMapping().binaryStorage.get(0); ObjectInspector keyFieldOI = keyField.getFieldObjectInspector(); if (!keyFieldOI.getCategory().equals(Category.PRIMITIVE) && declaredKeyField.getFieldObjectInspector().getCategory().equals(Category.PRIMITIVE)) { // we always serialize the String type using the escaped algorithm for LazyString - return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), - PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false, serdeParams); + serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI), + PrimitiveObjectInspectorFactory.javaStringObjectInspector, -1, 1, false, serdeParams, out); } else { // use the serialization option switch to write primitive values as either a variable // length UTF8 string or a fixed width bytes if serializing in binary format - return serialize(keyValue, keyFieldOI, 1, writeBinary, serdeParams); + serialize(keyValue, keyFieldOI, -1, 1, writeBinary, serdeParams, out); } - + return out.toByteArray(); } private void serializeField(Put put, StructField field, Object value, @@ -357,6 +361,9 @@ private void serializeField(Put put, StructField field, Object value, return; } + ByteStream.Output out = new ByteStream.Output(); + + int fieldID = field.getFieldID(); // Get the field objectInspector and the field object. ObjectInspector foi = field.getFieldObjectInspector(); @@ -371,55 +378,55 @@ private void serializeField(Put put, StructField field, Object value, return; } else { for (Map.Entry entry: map.entrySet()) { + out.reset(); // Get the Key + Object mkey = entry.getKey(); + boolean keyBinary = colMap.binaryStorage.get(0); // Map keys are required to be primitive and may be serialized in binary format - byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0), serdeParams); - if (columnQualifierBytes == null) { + if (!serialize(mkey, koi, fieldID, 3, keyBinary, serdeParams, out)) { continue; } + // Get the column-qualifier + byte[] columnQualifierBytes = out.toByteArray(); + out.reset(); + Object mvalue = entry.getValue(); + boolean valueBinary = colMap.binaryStorage.get(1); // Map values may be serialized in binary format when they are primitive and binary // serialization is the option selected - byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1), serdeParams); - if (bytes == null) { + if (!serialize(mvalue, voi, fieldID, 3, valueBinary, serdeParams, out)) { continue; } - - put.add(colMap.familyNameBytes, columnQualifierBytes, bytes); + put.add(colMap.familyNameBytes, columnQualifierBytes, out.toByteArray()); } } } else { - byte[] bytes = null; + out.reset(); // If the field that is passed in is NOT a primitive, and either the // field is not declared (no schema was given at initialization), or // the field is declared as a primitive in initialization, serialize // the data to JSON string. Otherwise serialize the data in the // delimited way. + boolean serialized; if (!foi.getCategory().equals(Category.PRIMITIVE) && declaredField.getFieldObjectInspector().getCategory().equals(Category.PRIMITIVE)) { // we always serialize the String type using the escaped algorithm for LazyString - bytes = serialize(SerDeUtils.getJSONString(value, foi), - PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false, serdeParams); + serialized = serialize(SerDeUtils.getJSONString(value, foi), + PrimitiveObjectInspectorFactory.javaStringObjectInspector, fieldID, 1, false, serdeParams, out); } else { // use the serialization option switch to write primitive values as either a variable // length UTF8 string or a fixed width bytes if serializing in binary format - bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0), serdeParams); + serialized = serialize(value, foi, fieldID, 1, colMap.binaryStorage.get(0), serdeParams, out); } - if (bytes == null) { + if (!serialized) { return; } - put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes); + put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, out.toByteArray()); } } - private static byte[] getBytesFromStream(ByteStream.Output ss) { - byte [] buf = new byte[ss.getCount()]; - System.arraycopy(ss.getData(), 0, buf, 0, ss.getCount()); - return buf; - } - /* * Serialize the row into a ByteStream. * @@ -431,18 +438,21 @@ private void serializeField(Put put, StructField field, Object value, * @throws IOException On error in writing to the serialization stream. * @return true On serializing a non-null object, otherwise false. */ - private static byte[] serialize(Object obj, ObjectInspector objInspector, int level, - boolean writeBinary, HBaseSerDeParameters serdeParams) throws IOException { - ByteStream.Output ss = new ByteStream.Output(); + private static boolean serialize(Object obj, ObjectInspector objInspector, int index, int level, + boolean writeBinary, HBaseSerDeParameters hbaseParam, ByteStream.Output out) throws IOException { + int pos = out.getCount(); + boolean serialized; if (objInspector.getCategory() == Category.PRIMITIVE && writeBinary) { - LazyUtils.writePrimitive(ss, obj, (PrimitiveObjectInspector) objInspector); + LazyUtils.writePrimitive(out, obj, (PrimitiveObjectInspector) objInspector); + serialized = true; } else { - if (false == serialize(obj, objInspector, level, serdeParams, ss)) { - return null; - } + serialized = serialize(obj, objInspector, level, hbaseParam, out); } - - return getBytesFromStream(ss); + LazySimpleSerDe.SerDeParameters serdeParam = hbaseParam.getSerdeParams(); + if (serialized && serdeParam.isEncoded(index)) { + serdeParam.encode(index, out, pos); + } + return serialized; } private static boolean serialize( @@ -545,8 +555,7 @@ private void initCompositeKeyClass(Configuration conf,Properties tbl) throws Ser try { compositeKeyObj = serdeParams.getCompositeKeyClass().getDeclaredConstructor( LazySimpleStructObjectInspector.class, Properties.class, Configuration.class) - .newInstance( - ((LazySimpleStructObjectInspector) keyObjectInspector), tbl, conf); + .newInstance(keyObjectInspector, tbl, conf); } catch (IllegalArgumentException e) { throw new SerDeException(e); } catch (SecurityException e) { diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java index b64590d..742c1c0 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java @@ -124,6 +124,10 @@ void init(Configuration job, Properties tbl, String serdeName) throws SerDeExcep } } + if (serdeParams.isEncoded(getRowKeyColumnOffset(columnMapping))) { + throw new SerDeException("row key column cannot be encoded"); + } + // Precondition: make sure this is done after the rest of the SerDe initialization is done. String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE); parseColumnStorageTypes(hbaseTableStorageType); diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java index cedef10..10651fe 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.hbase; +import java.io.IOException; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -26,11 +27,11 @@ import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyMap; import org.apache.hadoop.hive.serde2.lazy.LazyObject; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -46,12 +47,20 @@ private byte[] qualPrefix; private List binaryStorage; + private int index; + private LazySimpleSerDe.SerDeParameters serdeParams; + /** * Construct a LazyCellMap object with the ObjectInspector. * @param oi */ - public LazyHBaseCellMap(LazyMapObjectInspector oi) { + public LazyHBaseCellMap(int index, LazyMapObjectInspector oi) { super(oi); + this.index = index; + } + + public void setSerdeParams(LazySimpleSerDe.SerDeParameters serdeParams) { + this.serdeParams = serdeParams; } public void init( @@ -105,18 +114,26 @@ private void parse() { (PrimitiveObjectInspector) lazyMoi.getMapKeyObjectInspector(), binaryStorage.get(0)); - ByteArrayRef keyRef = new ByteArrayRef(); - keyRef.setData(e.getKey()); - key.init(keyRef, 0, keyRef.getData().length); + byte[] bkey = e.getKey(); + key.init(bkey, 0, bkey.length); // Value LazyObject value = LazyFactory.createLazyObject(lazyMoi.getMapValueObjectInspector(), binaryStorage.get(1)); - ByteArrayRef valueRef = new ByteArrayRef(); - valueRef.setData(e.getValue()); - value.init(valueRef, 0, valueRef.getData().length); + byte[] bvalue = e.getValue(); + int length = bvalue.length; + if (serdeParams != null && serdeParams.rewriter != null) { + try { + serdeParams.decode(index, bvalue, 0, length); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + bvalue = serdeParams.output.getData(); + length = serdeParams.output.getCount(); + } + value.init(bvalue, 0, length); // Put the key/value into the map cachedMap.put(key.getObject(), value.getObject()); diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java index fc40195..397746d 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java @@ -18,13 +18,13 @@ package org.apache.hadoop.hive.hbase; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hive.hbase.HBaseSerDe.ColumnMapping; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObject; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; /** * LazyObject for storing an HBase row. The field of an HBase row can be @@ -86,8 +85,7 @@ public void init(Result r, List columnsMapping, Object compositeK private void parse() { if (getFields() == null) { - List fieldRefs = - ((StructObjectInspector)getInspector()).getAllStructFieldRefs(); + List fieldRefs = getInspector().getAllStructFieldRefs(); LazyObject [] fields = new LazyObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { @@ -95,8 +93,11 @@ private void parse() { if (colMap.qualifierName == null && !colMap.hbaseRowKey) { // a column family - fields[i] = new LazyHBaseCellMap( + fields[i] = new LazyHBaseCellMap(i, (LazyMapObjectInspector) fieldRefs.get(i).getFieldObjectInspector()); + if (serdeParams != null && serdeParams.isEncoded(i)) { + ((LazyHBaseCellMap)fields[i]).setSerdeParams(serdeParams); + } continue; } @@ -153,12 +154,11 @@ private Object uncheckedGetField(int fieldID) { boolean [] fieldsInited = getFieldInited(); if (!fieldsInited[fieldID]) { - ByteArrayRef ref = null; ColumnMapping colMap = columnsMapping.get(fieldID); + byte[] bytes = null; if (colMap.hbaseRowKey) { - ref = new ByteArrayRef(); - ref.setData(result.getRow()); + bytes = result.getRow(); } else { if (colMap.qualifierName == null) { // it is a column family @@ -169,24 +169,31 @@ private Object uncheckedGetField(int fieldID) { result, colMap.familyNameBytes, colMap.binaryStorage, colMap.qualifierPrefixBytes); } else { // it is a column i.e. a column-family with column-qualifier - byte [] res = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); - - if (res == null) { + bytes = result.getValue(colMap.familyNameBytes, colMap.qualifierNameBytes); + if (bytes == null) { return null; - } else { - ref = new ByteArrayRef(); - ref.setData(res); } } } + int length = bytes == null ? 0 : bytes.length; + if (bytes != null && serdeParams != null && serdeParams.isEncoded(fieldID)) { + try { + serdeParams.decode(fieldID, bytes, 0, bytes.length); + } catch (IOException e) { + throw new RuntimeException(e); + } + bytes = serdeParams.output.getData(); + length = serdeParams.output.getCount(); + } + - if (ref != null) { - fields[fieldID].init(ref, 0, ref.getData().length); + if (bytes != null) { + fields[fieldID].init(bytes, 0, length); // if it was a row key and we have been provided a custom composite key class, initialize it // with the bytes for the row key if (colMap.hbaseRowKey && compositeKeyObj != null) { - ((LazyStruct) compositeKeyObj).init(ref, 0, ref.getData().length); + ((LazyStruct) compositeKeyObj).init(bytes, 0, length); } } } diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestCompositeKey.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestCompositeKey.java index 13c344b..15797c4 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestCompositeKey.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestCompositeKey.java @@ -18,11 +18,11 @@ package org.apache.hadoop.hive.hbase; +import java.util.Arrays; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.util.Bytes; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; public class HBaseTestCompositeKey extends HBaseCompositeKey { @@ -39,8 +39,8 @@ public HBaseTestCompositeKey(LazySimpleStructObjectInspector oi, Properties tbl, } @Override - public void init(ByteArrayRef bytes, int start, int length) { - this.bytes = bytes.getData(); + public void init(byte[] bytes, int start, int length) { + this.bytes = Arrays.copyOfRange(bytes, start, start + length); } @Override diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java index 7c4fc9f..93fc2ae 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java @@ -66,7 +66,7 @@ public void testLazyHBaseCellMap1() throws SerDeException { TypeInfoUtils.getTypeInfosFromTypeString("map").get(0), new byte[]{(byte)1, (byte)2}, 0, nullSequence, false, (byte)0); - LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + LazyHBaseCellMap b = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); // Initialize a result List kvs = new ArrayList(); @@ -128,7 +128,7 @@ public void testLazyHBaseCellMap2() throws SerDeException { TypeInfoUtils.getTypeInfosFromTypeString("map").get(0), new byte[]{(byte)'#', (byte)'\t'}, 0, nullSequence, false, (byte)0); - LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + LazyHBaseCellMap b = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); // Initialize a result List kvs = new ArrayList(); @@ -190,7 +190,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryIntKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); ObjectInspector oi = LazyFactory.createLazyObjectInspector( mapBinaryIntKeyValue, new byte [] {(byte)1, (byte) 2}, 0, nullSequence, false, (byte) 0); - LazyHBaseCellMap hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + LazyHBaseCellMap hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); List kvs = new ArrayList(); byte [] rowKey = "row-key".getBytes(); @@ -233,7 +233,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( mapBinaryByteKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); - hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); byte [] cfByte = "cf-byte".getBytes(); kvs.clear(); kvs.add(new KeyValue(rowKey, cfByte, new byte [] {(byte) 1}, new byte [] {(byte) 1})); @@ -271,7 +271,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( mapBinaryShortKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); - hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); byte [] cfShort = "cf-short".getBytes(); kvs.clear(); kvs.add(new KeyValue(rowKey, cfShort, Bytes.toBytes((short) 1), Bytes.toBytes((short) 1))); @@ -309,7 +309,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( mapBinaryLongKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); - hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); byte [] cfLong = "cf-long".getBytes(); kvs.clear(); kvs.add(new KeyValue(rowKey, cfLong, Bytes.toBytes((long) 1), Bytes.toBytes((long) 1))); @@ -348,7 +348,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { oi = LazyFactory.createLazyObjectInspector( mapBinaryFloatKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); - hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); byte [] cfFloat = "cf-float".getBytes(); kvs.clear(); kvs.add(new KeyValue(rowKey, cfFloat, Bytes.toBytes((float) 1.0F), @@ -388,7 +388,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { oi = LazyFactory.createLazyObjectInspector( mapBinaryDoubleKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); - hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); byte [] cfDouble = "cf-double".getBytes(); kvs.clear(); kvs.add(new KeyValue(rowKey, cfDouble, Bytes.toBytes(1.0), Bytes.toBytes(1.0))); @@ -427,7 +427,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { oi = LazyFactory.createLazyObjectInspector( mapBinaryBooleanKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); - hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); + hbaseCellMap = new LazyHBaseCellMap(0, (LazyMapObjectInspector) oi); byte [] cfBoolean = "cf-boolean".getBytes(); kvs.clear(); kvs.add(new KeyValue(rowKey, cfBoolean, Bytes.toBytes(false), Bytes.toBytes(false))); diff --git hbase-handler/src/test/queries/positive/hbase_column_encoding.q hbase-handler/src/test/queries/positive/hbase_column_encoding.q new file mode 100644 index 0000000..b52fcc1 --- /dev/null +++ hbase-handler/src/test/queries/positive/hbase_column_encoding.q @@ -0,0 +1,16 @@ +create table encode_test1(id INT, name STRING, phone STRING, address STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( +'hbase.columns.mapping' = ':key,private:name,private:phone,private:address', +'column.encode.columns'='phone,address', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly'); + +create table encode_test2(id INT, name STRING, phone STRING, address STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( +'hbase.columns.mapping' = ':key,private:name,private:phone,private:address', +'column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter'); + +from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value); + +select * from encode_test1; +select * from encode_test2; diff --git hbase-handler/src/test/results/positive/hbase_column_encoding.q.out hbase-handler/src/test/results/positive/hbase_column_encoding.q.out new file mode 100644 index 0000000..269eae1 --- /dev/null +++ hbase-handler/src/test/results/positive/hbase_column_encoding.q.out @@ -0,0 +1,60 @@ +PREHOOK: query: create table encode_test1(id INT, name STRING, phone STRING, address STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( +'hbase.columns.mapping' = ':key,private:name,private:phone,private:address', +'column.encode.columns'='phone,address', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table encode_test1(id INT, name STRING, phone STRING, address STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( +'hbase.columns.mapping' = ':key,private:name,private:phone,private:address', +'column.encode.columns'='phone,address', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encode_test1 +PREHOOK: query: create table encode_test2(id INT, name STRING, phone STRING, address STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( +'hbase.columns.mapping' = ':key,private:name,private:phone,private:address', +'column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table encode_test2(id INT, name STRING, phone STRING, address STRING) +STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( +'hbase.columns.mapping' = ':key,private:name,private:phone,private:address', +'column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encode_test2 +PREHOOK: query: from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@encode_test1 +PREHOOK: Output: default@encode_test2 +POSTHOOK: query: from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@encode_test1 +POSTHOOK: Output: default@encode_test2 +PREHOOK: query: select * from encode_test1 +PREHOOK: type: QUERY +PREHOOK: Input: default@encode_test1 +#### A masked pattern was here #### +POSTHOOK: query: select * from encode_test1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encode_test1 +#### A masked pattern was here #### +238 navis MDEwLTAwMDAtMjM4 U2VvdWwudmFsXzIzOA== +86 navis MDEwLTAwMDAtODY= U2VvdWwudmFsXzg2 +PREHOOK: query: select * from encode_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@encode_test2 +#### A masked pattern was here #### +POSTHOOK: query: select * from encode_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encode_test2 +#### A masked pattern was here #### +238 navis 010-0000-238 Seoul.val_238 +86 navis 010-0000-86 Seoul.val_86 diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java index c4268c1..d30d387 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcSerde.java @@ -27,7 +27,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedSerde; -import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -40,7 +40,7 @@ * A serde class for ORC. * It transparently passes the object to/from the ORC file reader/writer. */ -public class OrcSerde implements SerDe, VectorizedSerde { +public class OrcSerde extends AbstractSerDe implements VectorizedSerde { private static final Log LOG = LogFactory.getLog(OrcSerde.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java index 293b74e..29ded67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcStruct.java @@ -166,6 +166,11 @@ public ObjectInspector getFieldObjectInspector() { } @Override + public int getFieldID() { + return offset; + } + + @Override public String getFieldComment() { return null; } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java index bb02bab..5fe3858 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java @@ -133,7 +133,7 @@ public Object getStructFieldData(final Object data, final StructField fieldRef) if (data instanceof ArrayWritable) { final ArrayWritable arr = (ArrayWritable) data; - return arr.get()[((StructFieldImpl) fieldRef).getIndex()]; + return arr.get()[((StructFieldImpl) fieldRef).getFieldID()]; } throw new UnsupportedOperationException("Cannot inspect " + data.getClass().getCanonicalName()); @@ -171,7 +171,7 @@ public Object create() { @Override public Object setStructFieldData(Object struct, StructField field, Object fieldValue) { final ArrayList list = (ArrayList) struct; - list.set(((StructFieldImpl) field).getIndex(), fieldValue); + list.set(((StructFieldImpl) field).getFieldID(), fieldValue); return list; } @@ -219,7 +219,7 @@ public String getFieldName() { return name; } - public int getIndex() { + public int getFieldID() { return index; } diff --git ql/src/test/queries/clientpositive/column_encoding.q ql/src/test/queries/clientpositive/column_encoding.q new file mode 100644 index 0000000..413e240 --- /dev/null +++ ql/src/test/queries/clientpositive/column_encoding.q @@ -0,0 +1,16 @@ +create table encode_test1(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.columns'='phone,address', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +STORED AS TEXTFILE; + +create table encode_test2(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +STORED AS TEXTFILE; + +from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value); + +select * from encode_test1; +select * from encode_test2; diff --git ql/src/test/results/clientpositive/column_encoding.q.out ql/src/test/results/clientpositive/column_encoding.q.out new file mode 100644 index 0000000..4589c07 --- /dev/null +++ ql/src/test/results/clientpositive/column_encoding.q.out @@ -0,0 +1,84 @@ +PREHOOK: query: create table encode_test1(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.columns'='phone,address', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table encode_test1(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.columns'='phone,address', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64WriteOnly') +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encode_test1 +PREHOOK: query: create table encode_test2(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: create table encode_test2(id INT, name STRING, phone STRING, address STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ('column.encode.indices'='2,3', 'column.encode.classname'='org.apache.hadoop.hive.serde2.Base64Rewriter') +STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@encode_test2 +PREHOOK: query: from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@encode_test1 +PREHOOK: Output: default@encode_test2 +POSTHOOK: query: from src tablesample (2 rows) +insert into table encode_test1 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +insert into table encode_test2 select key,'navis',concat('010-0000-', key), concat('Seoul.', value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@encode_test1 +POSTHOOK: Output: default@encode_test2 +POSTHOOK: Lineage: encode_test1.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.name SIMPLE [] +POSTHOOK: Lineage: encode_test1.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.name SIMPLE [] +POSTHOOK: Lineage: encode_test2.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from encode_test1 +PREHOOK: type: QUERY +PREHOOK: Input: default@encode_test1 +#### A masked pattern was here #### +POSTHOOK: query: select * from encode_test1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encode_test1 +#### A masked pattern was here #### +POSTHOOK: Lineage: encode_test1.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.name SIMPLE [] +POSTHOOK: Lineage: encode_test1.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.name SIMPLE [] +POSTHOOK: Lineage: encode_test2.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +238 navis MDEwLTAwMDAtMjM4 U2VvdWwudmFsXzIzOA== +86 navis MDEwLTAwMDAtODY= U2VvdWwudmFsXzg2 +PREHOOK: query: select * from encode_test2 +PREHOOK: type: QUERY +PREHOOK: Input: default@encode_test2 +#### A masked pattern was here #### +POSTHOOK: query: select * from encode_test2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@encode_test2 +#### A masked pattern was here #### +POSTHOOK: Lineage: encode_test1.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test1.name SIMPLE [] +POSTHOOK: Lineage: encode_test1.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.address EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.id EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: encode_test2.name SIMPLE [] +POSTHOOK: Lineage: encode_test2.phone EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +238 navis 010-0000-238 Seoul.val_238 +86 navis 010-0000-86 Seoul.val_86 diff --git serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java index 515cf25..18b0016 100644 --- serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java +++ serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java @@ -61,6 +61,12 @@ public static final String ESCAPE_CHAR = "escape.delim"; + public static final String COLUMN_ENCODE_COLUMNS = "column.encode.columns"; + + public static final String COLUMN_ENCODE_INDICES = "column.encode.indices"; + + public static final String COLUMN_ENCODE_CLASSNAME = "column.encode.classname"; + public static final String HEADER_COUNT = "skip.header.line.count"; public static final String FOOTER_COUNT = "skip.footer.line.count"; diff --git serde/src/java/org/apache/hadoop/hive/serde2/AbstractFieldRewriter.java serde/src/java/org/apache/hadoop/hive/serde2/AbstractFieldRewriter.java new file mode 100644 index 0000000..d6a0fcf --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/AbstractFieldRewriter.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +public class AbstractFieldRewriter implements FieldRewriter { + + @Override + public void init(List columnNames, List columnTypes, Properties properties) + throws IOException { + } + + @Override + public void encode(int index, ByteStream.Input input, ByteStream.Output output) + throws IOException { + output.write(input.toBytes()); + } + + @Override + public void decode(int index, ByteStream.Input input, ByteStream.Output output) + throws IOException { + output.write(input.toBytes()); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/Base64Rewriter.java serde/src/java/org/apache/hadoop/hive/serde2/Base64Rewriter.java new file mode 100644 index 0000000..f0eefce --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/Base64Rewriter.java @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import org.apache.commons.codec.binary.Base64; + +import java.io.IOException; + +public class Base64Rewriter extends AbstractFieldRewriter { + + @Override + public void encode(int index, ByteStream.Input input, ByteStream.Output output) + throws IOException { + output.write(Base64.encodeBase64(input.toBytes())); + } + + @Override + public void decode(int index, ByteStream.Input input, ByteStream.Output output) + throws IOException { + output.write(Base64.decodeBase64(input.toBytes())); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/Base64WriteOnly.java serde/src/java/org/apache/hadoop/hive/serde2/Base64WriteOnly.java new file mode 100644 index 0000000..2e8813c --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/Base64WriteOnly.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import org.apache.commons.codec.binary.Base64; + +import java.io.IOException; + +public class Base64WriteOnly extends AbstractFieldRewriter { + + @Override + public void encode(int index, ByteStream.Input input, ByteStream.Output output) + throws IOException { + output.write(Base64.encodeBase64(input.toBytes())); + } +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java index 73d9b29..c4592b9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java +++ serde/src/java/org/apache/hadoop/hive/serde2/ByteStream.java @@ -21,6 +21,8 @@ import org.apache.hadoop.hive.common.io.NonSyncByteArrayInputStream; import org.apache.hadoop.hive.common.io.NonSyncByteArrayOutputStream; +import java.util.Arrays; + /** * Extensions to bytearrayinput/output streams. * @@ -45,6 +47,10 @@ public void reset(byte[] argBuf, int argCount) { count = argCount; } + public byte[] toBytes() { + return Arrays.copyOfRange(buf, pos, count); + } + public Input() { super(new byte[1]); } @@ -79,5 +85,10 @@ public Output() { public Output(int size) { super(size); } + + public void writeTo(int pos, Output output) { + count = pos; + write(output.buf, 0, output.count); + } } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java index 179f9b5..becae0a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/DelimitedJSONSerDe.java @@ -18,10 +18,9 @@ package org.apache.hadoop.hive.serde2; -import java.io.IOException; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -29,6 +28,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.io.Writable; +import java.util.Properties; + /** * DelimitedJSONSerDe. * @@ -42,6 +43,14 @@ public DelimitedJSONSerDe() throws SerDeException { } + private String nullStr; + + @Override + public void initialize(Configuration job, Properties tbl) + throws SerDeException { + super.initialize(job, tbl); + nullStr = serdeParams.getNullSequence().toString(); + } /** * Not implemented. */ @@ -53,22 +62,13 @@ public Object deserialize(Writable field) throws SerDeException { @Override protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector objInspector, - SerDeParameters serdeParams) throws SerDeException { - if (!objInspector.getCategory().equals(Category.PRIMITIVE) || (objInspector.getTypeName().equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME))) { + SerDeParameters serdeParams, int index) throws SerDeException { + if (!objInspector.getCategory().equals(Category.PRIMITIVE) || + (objInspector.getTypeName().equalsIgnoreCase(serdeConstants.BINARY_TYPE_NAME))) { //do this for all complex types and binary - try { - serialize(out, SerDeUtils.getJSONString(obj, objInspector, serdeParams.getNullSequence().toString()), - PrimitiveObjectInspectorFactory.javaStringObjectInspector, serdeParams.getSeparators(), - 1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), - serdeParams.getNeedsEscape()); - - } catch (IOException e) { - throw new SerDeException(e); - } - - } else { - //primitives except binary - super.serializeField(out, obj, objInspector, serdeParams); + obj = SerDeUtils.getJSONString(obj, objInspector, nullStr); + objInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector; } + super.serializeField(out, obj, objInspector, serdeParams, index); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/FieldRewritable.java serde/src/java/org/apache/hadoop/hive/serde2/FieldRewritable.java new file mode 100644 index 0000000..4d482f6 --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/FieldRewritable.java @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +/** + * marker interface for serde supporting field rewriting. see LazySimpleSerde + * serdeConstants.COLUMN_ENCODE_INDICES : column indices to be rewritten + * serdeConstants.COLUMN_ENCODE_CLASSNAME : column rewriter implementing FieldRewriter + */ +public interface FieldRewritable { +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/FieldRewriter.java serde/src/java/org/apache/hadoop/hive/serde2/FieldRewriter.java new file mode 100644 index 0000000..e75bf6b --- /dev/null +++ serde/src/java/org/apache/hadoop/hive/serde2/FieldRewriter.java @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.serde2; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; + +import java.io.IOException; +import java.util.List; +import java.util.Properties; + +/** + * Encode/decode binary format of column + * + * Currently, it's only supported by LazySimpleSerDe and HBaserSerDe, which is + * marked by {@link FieldRewritable} interface. + */ +public interface FieldRewriter { + + void init(List columnNames, List columnTypes, Properties properties) + throws IOException; + + void encode(int index, ByteStream.Input input, ByteStream.Output output) throws IOException; + + void decode(int index, ByteStream.Input input, ByteStream.Output output) throws IOException; + +} diff --git serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java index dba5e33..e030a4f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/NullStructSerDe.java @@ -46,6 +46,11 @@ public ObjectInspector getFieldObjectInspector() { } @Override + public int getFieldID() { + return 0; + } + + @Override public String getFieldComment() { return ""; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java index 157600e..939a3a0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStruct.java @@ -22,7 +22,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; @@ -61,11 +60,10 @@ public ColumnarStruct(ObjectInspector oi, List notSkippedColumnIDs, Tex } @Override - protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef, + protected int getLength(ObjectInspector objectInspector, byte[] bytes, int start, int fieldLen) { if (fieldLen == lengthNullSequence) { - byte[] data = cachedByteArrayRef.getData(); - if (LazyUtils.compare(data, start, fieldLen, + if (LazyUtils.compare(bytes, start, fieldLen, nullSequence.getBytes(), 0, lengthNullSequence) == 0) { return -1; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java index 1fd6853..708325d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarStructBase.java @@ -23,7 +23,6 @@ import java.util.List; import org.apache.hadoop.hive.serde2.SerDeStatsStruct; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -37,7 +36,6 @@ * use an array instead of only one object in case in future hive does not do * the byte copy. */ - ByteArrayRef cachedByteArrayRef; BytesRefWritable rawBytesField; boolean inited; boolean fieldSkipped; @@ -45,7 +43,6 @@ public FieldInfo(LazyObjectBase lazyObject, boolean fieldSkipped, ObjectInspector oi) { field = lazyObject; - cachedByteArrayRef = new ByteArrayRef(); objectInspector = oi; if (fieldSkipped) { this.fieldSkipped = true; @@ -94,23 +91,24 @@ protected Object uncheckedGetField() { if (fieldSkipped) { return null; } + byte[] data; + try { + data = rawBytesField.getData(); + } catch (IOException e) { + throw new RuntimeException(e); + } if (!inited) { - try { - cachedByteArrayRef.setData(rawBytesField.getData()); - } catch (IOException e) { - throw new RuntimeException(e); - } inited = true; - int byteLength = getLength(objectInspector, cachedByteArrayRef, rawBytesField.getStart(), + int byteLength = getLength(objectInspector, data, rawBytesField.getStart(), rawBytesField.getLength()); if (byteLength == -1) { return null; } - field.init(cachedByteArrayRef, rawBytesField.getStart(), byteLength); + field.init(data, rawBytesField.getStart(), byteLength); return field.getObject(); } else { - if (getLength(objectInspector, cachedByteArrayRef, rawBytesField.getStart(), + if (getLength(objectInspector, data, rawBytesField.getStart(), rawBytesField.getLength()) == -1) { return null; } @@ -174,8 +172,9 @@ public Object getField(int fieldID) { /** * Check if the object is null and return the length of the stream * + * * @param objectInspector - * @param cachedByteArrayRef + * @param bytes * the bytes of the object * @param start * the start offset @@ -185,7 +184,7 @@ public Object getField(int fieldID) { * @return -1 for null, >=0 for length */ protected abstract int getLength(ObjectInspector objectInspector, - ByteArrayRef cachedByteArrayRef, int start, int length); + byte[] bytes, int start, int length); /** * create the lazy object for this field diff --git serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java index 6d9715a..540b548 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarStruct.java @@ -20,7 +20,6 @@ import java.util.List; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -35,7 +34,7 @@ public LazyBinaryColumnarStruct(ObjectInspector oi, List notSkippedColu } @Override - protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByteArrayRef, + protected int getLength(ObjectInspector objectInspector, byte[] bytes, int start, int length) { if (length == 0) { return -1; @@ -45,7 +44,7 @@ protected int getLength(ObjectInspector objectInspector, ByteArrayRef cachedByte PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector) objectInspector) .getPrimitiveCategory(); if (primitiveCategory.equals(PrimitiveCategory.STRING) && (length == 1) && - (cachedByteArrayRef.getData()[start] + (bytes[start] == LazyBinaryColumnarSerDe.INVALID_UTF__SINGLE_BYTE[0])) { return 0; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java index ce0cfb3..f137c31 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyArray.java @@ -75,10 +75,10 @@ protected LazyArray(LazyListObjectInspector oi) { /** * Set the row data for this LazyArray. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; cachedList = null; @@ -117,8 +117,6 @@ private void parse() { return; } - byte[] bytes = this.bytes.getData(); - arrayLength = 0; int arrayByteEnd = start + length; int elementByteBegin = start; @@ -185,7 +183,7 @@ private Object uncheckedGetElement(int index) { int elementLength = startPosition[index + 1] - startPosition[index] - 1; if (elementLength == nullSequence.getLength() && 0 == LazyUtils - .compare(bytes.getData(), startPosition[index], elementLength, + .compare(bytes, startPosition[index], elementLength, nullSequence.getBytes(), 0, nullSequence.getLength())) { return arrayElements[index] = null; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java index ae12f20..353d36b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java @@ -18,14 +18,11 @@ package org.apache.hadoop.hive.serde2.lazy; -import java.nio.charset.CharacterCodingException; - import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; public class LazyBinary extends LazyPrimitive { @@ -45,10 +42,10 @@ public LazyBinary(LazyBinary other){ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { byte[] recv = new byte[length]; - System.arraycopy(bytes.getData(), start, recv, 0, length); + System.arraycopy(bytes, start, recv, 0, length); boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); if (arrayByteBase64) { LOG.debug("Data not contains valid characters within the Base64 alphabet so " + diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java index 42cb43c..c7e4538 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBoolean.java @@ -47,25 +47,25 @@ public LazyBoolean(LazyBoolean copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { - if (length == 4 && Character.toUpperCase(bytes.getData()[start]) == 'T' - && Character.toUpperCase(bytes.getData()[start + 1]) == 'R' - && Character.toUpperCase(bytes.getData()[start + 2]) == 'U' - && Character.toUpperCase(bytes.getData()[start + 3]) == 'E') { + public void init(byte[] bytes, int start, int length) { + if (length == 4 && Character.toUpperCase(bytes[start]) == 'T' + && Character.toUpperCase(bytes[start + 1]) == 'R' + && Character.toUpperCase(bytes[start + 2]) == 'U' + && Character.toUpperCase(bytes[start + 3]) == 'E') { data.set(true); isNull = false; } else if (length == 5 - && Character.toUpperCase(bytes.getData()[start]) == 'F' - && Character.toUpperCase(bytes.getData()[start + 1]) == 'A' - && Character.toUpperCase(bytes.getData()[start + 2]) == 'L' - && Character.toUpperCase(bytes.getData()[start + 3]) == 'S' - && Character.toUpperCase(bytes.getData()[start + 4]) == 'E') { + && Character.toUpperCase(bytes[start]) == 'F' + && Character.toUpperCase(bytes[start + 1]) == 'A' + && Character.toUpperCase(bytes[start + 2]) == 'L' + && Character.toUpperCase(bytes[start + 3]) == 'S' + && Character.toUpperCase(bytes[start + 4]) == 'E') { data.set(false); isNull = false; } else { if (oi.isExtendedLiteral()) { if (length == 1) { - byte c = bytes.getData()[start]; + byte c = bytes[start]; if (c == '1' || c == 't' || c == 'T') { data.set(true); isNull = false; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java index a3b8f76..5fefaae 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyByte.java @@ -47,9 +47,9 @@ public LazyByte(LazyByte copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseByte(bytes.getData(), start, length, 10)); + data.set(parseByte(bytes, start, length, 10)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java index 7af2374..fc06802 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDate.java @@ -57,10 +57,10 @@ public LazyDate(LazyDate copy) { * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String s = null; try { - s = Text.decode(bytes.getData(), start, length); + s = Text.decode(bytes, start, length); data.set(Date.valueOf(s)); isNull = false; } catch (Exception e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java index 05ca4e9..5e0928b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyDouble.java @@ -44,10 +44,10 @@ public LazyDouble(LazyDouble copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(Double.parseDouble(byteData)); isNull = false; } catch (NumberFormatException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java index 37676d1..fc644c7 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFloat.java @@ -44,10 +44,10 @@ public LazyFloat(LazyFloat copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(Float.parseFloat(byteData)); isNull = false; } catch (NumberFormatException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java index ef469eb..b21a545 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveChar.java @@ -54,10 +54,10 @@ public void setValue(LazyHiveChar copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(byteData, maxLength); isNull = false; } catch (CharacterCodingException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java index 78cc381..83d13d0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveDecimal.java @@ -62,10 +62,10 @@ public LazyHiveDecimal(LazyHiveDecimal copy) { * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); } catch (CharacterCodingException e) { isNull = true; LOG.debug("Data not in the HiveDecimal data type range so converted to null.", e); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java index bc8d41e..2a3a0ec 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyHiveVarchar.java @@ -54,10 +54,10 @@ public void setValue(LazyHiveVarchar copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String byteData = null; try { - byteData = Text.decode(bytes.getData(), start, length); + byteData = Text.decode(bytes, start, length); data.set(byteData, maxLength); isNull = false; } catch (CharacterCodingException e) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java index ad82ebf..bfb79ad 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyInteger.java @@ -50,9 +50,9 @@ public LazyInteger(LazyInteger copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseInt(bytes.getData(), start, length, 10)); + data.set(parseInt(bytes, start, length, 10)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java index a9779a0..3cc2163 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyLong.java @@ -50,9 +50,9 @@ public LazyLong(LazyLong copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseLong(bytes.getData(), start, length, 10)); + data.set(parseLong(bytes, start, length, 10)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java index 686fc76..824b7b3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java @@ -93,10 +93,10 @@ protected LazyMap(LazyMapObjectInspector oi) { /** * Set the row data for this LazyArray. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; cachedMap = null; @@ -150,7 +150,6 @@ private void parse() { int elementByteBegin = start; int keyValueSeparatorPosition = -1; int elementByteEnd = start; - byte[] bytes = this.bytes.getData(); Set keySet = new HashSet(); // Go through all bytes in the byte[] @@ -265,7 +264,7 @@ private LazyObject uncheckedGetValue(int index) { int valueILength = valueLength[index]; if (valueILength < 0 || ((valueILength == nullSequence.getLength()) && 0 == LazyUtils - .compare(bytes.getData(), valueIBegin, valueILength, nullSequence + .compare(bytes, valueIBegin, valueILength, nullSequence .getBytes(), 0, nullSequence.getLength()))) { return valueObjects[index] = null; } @@ -292,7 +291,7 @@ private LazyObject uncheckedGetValue(int index) { int keyILength = keyEnd[index] - keyStart[index]; if (keyILength < 0 || ((keyILength == nullSequence.getLength()) && 0 == LazyUtils.compare( - bytes.getData(), keyIBegin, keyILength, nullSequence.getBytes(), 0, + bytes, keyIBegin, keyILength, nullSequence.getBytes(), 0, nullSequence.getLength()))) { return keyObjects[index] = null; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java index e4cffc9..2c18688 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyNonPrimitive.java @@ -25,7 +25,7 @@ public abstract class LazyNonPrimitive extends LazyObject { - protected ByteArrayRef bytes; + protected byte[] bytes; protected int start; protected int length; @@ -48,7 +48,7 @@ protected LazyNonPrimitive(OI oi) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { if (bytes == null) { throw new RuntimeException("bytes cannot be null!"); } @@ -56,7 +56,7 @@ public void init(ByteArrayRef bytes, int start, int length) { this.start = start; this.length = length; assert start >= 0; - assert start + length <= bytes.getData().length; + assert start + length <= bytes.length; } @Override @@ -66,6 +66,6 @@ public Object getObject() { @Override public int hashCode() { - return LazyUtils.hashBytes(bytes.getData(), start, length); + return LazyUtils.hashBytes(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java index 10f4c05..1ccf9b0 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObject.java @@ -40,9 +40,6 @@ protected LazyObject(OI oi) { this.oi = oi; } - @Override - public abstract int hashCode(); - protected OI getInspector() { return oi; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java index 3334dff..ae12085 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyObjectBase.java @@ -25,15 +25,15 @@ * that we will be able to drop the reference to byte[] by a single * assignment. The ByteArrayRef object can be reused across multiple rows. * + * * @param bytes * The wrapper of the byte[]. * @param start * The start position inside the bytes. * @param length * The length of the data, starting from "start" - * @see ByteArrayRef */ - public abstract void init(ByteArrayRef bytes, int start, int length); + public abstract void init(byte[] bytes, int start, int length); /** * If the LazyObjectBase is a primitive Object, then deserialize it and return the @@ -41,4 +41,6 @@ */ public abstract Object getObject(); + + public abstract int hashCode(); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java index 7d23c46..ef8f4a2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyPrimitive.java @@ -84,10 +84,10 @@ public boolean equals(Object obj) { return data.equals(((LazyPrimitive) obj).getWritableObject()); } - public void logExceptionMessage(ByteArrayRef bytes, int start, int length, String dataType) { + public void logExceptionMessage(byte[] bytes, int start, int length, String dataType) { try { if(LOG.isDebugEnabled()) { - String byteData = Text.decode(bytes.getData(), start, length); + String byteData = Text.decode(bytes, start, length); LOG.debug("Data not in the " + dataType + " data type range so converted to null. Given data is :" + byteData, new Exception("For debugging purposes")); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java index f04e131..6237994 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyShort.java @@ -47,9 +47,9 @@ public LazyShort(LazyShort copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { try { - data.set(parseShort(bytes.getData(), start, length)); + data.set(parseShort(bytes, start, length)); isNull = false; } catch (NumberFormatException e) { isNull = true; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index 82c1263..4a5e5c3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -23,6 +23,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.TreeSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -31,6 +32,8 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.ByteStream; +import org.apache.hadoop.hive.serde2.FieldRewritable; +import org.apache.hadoop.hive.serde2.FieldRewriter; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; @@ -50,6 +53,7 @@ import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; +import org.apache.hadoop.util.ReflectionUtils; /** * LazySimpleSerDe can be used to read the same data format as @@ -61,7 +65,7 @@ * Also LazySimpleSerDe outputs typed columns instead of treating all columns as * String like MetadataTypedColumnsetSerDe. */ -public class LazySimpleSerDe extends AbstractSerDe { +public class LazySimpleSerDe extends AbstractSerDe implements FieldRewritable { public static final Log LOG = LogFactory.getLog(LazySimpleSerDe.class .getName()); @@ -129,6 +133,11 @@ public static byte getByte(String altValue, byte defaultVal) { boolean escaped; byte escapeChar; boolean[] needsEscape; + boolean[] needEncoding; + + public FieldRewriter rewriter; + public transient final ByteStream.Input input = new ByteStream.Input(); + public transient final ByteStream.Output output = new ByteStream.Output(); boolean extendedBooleanLiteral; @@ -171,9 +180,26 @@ public byte getEscapeChar() { public boolean[] getNeedsEscape() { return needsEscape; } + + public boolean isEncoded(int index) { + return needEncoding != null && index >= 0 && needEncoding[index]; + } + + public void encode(int index, ByteStream.Output out, int pos) throws IOException { + input.reset(out.getData(), pos, out.getCount() - pos); + output.reset(); + rewriter.encode(index, input, output); + out.writeTo(pos, output); + } + + public void decode(int index, byte[] bytes, int start, int length) throws IOException { + input.reset(bytes, start, length); + output.reset(); + rewriter.decode(index, input, output); + } } - SerDeParameters serdeParams = null; + protected SerDeParameters serdeParams = null; /** * Initialize the SerDe given the parameters. serialization.format: separator @@ -200,6 +226,8 @@ public void initialize(Configuration job, Properties tbl) cachedLazyStruct = (LazyStruct) LazyFactory .createLazyObject(cachedObjectInspector); + cachedLazyStruct.setSerdeParams(serdeParams); + LOG.debug(getClass().getName() + " initialized with: columnNames=" + serdeParams.columnNames + " columnTypes=" + serdeParams.columnTypes + " separator=" + Arrays.asList(serdeParams.separators) @@ -310,17 +338,65 @@ public static SerDeParameters initSerdeParams(Configuration job, } } - serdeParams.extendedBooleanLiteral = job == null ? false : + serdeParams.extendedBooleanLiteral = job != null && job.getBoolean(ConfVars.HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL.varname, false); + + String encodeIndices = tbl.getProperty(serdeConstants.COLUMN_ENCODE_INDICES); + if (encodeIndices != null) { + TreeSet indices = new TreeSet(); + for (String index : encodeIndices.split(",")) { + indices.add(Integer.parseInt(index.trim())); + } + serdeParams.needEncoding = new boolean[serdeParams.columnNames.size()]; + for (int index : indices) { + serdeParams.needEncoding[index] = true; + } + } + String encodeColumns = tbl.getProperty(serdeConstants.COLUMN_ENCODE_COLUMNS); + if (encodeColumns != null) { + if (serdeParams.needEncoding == null) { + serdeParams.needEncoding = new boolean[serdeParams.columnNames.size()]; + } + for (String column : encodeColumns.split(",")) { + serdeParams.needEncoding[findIndex(serdeParams.columnNames, column.trim())] = true; + } + } + + String encoderClass = tbl.getProperty(serdeConstants.COLUMN_ENCODE_CLASSNAME); + if (encoderClass != null) { + serdeParams.rewriter = createRewriter(encoderClass, tbl, serdeParams, job); + } + if (serdeParams.needEncoding != null && serdeParams.rewriter == null) { + throw new SerDeException("Encoder is not specified by serde property 'column.encode.classname'"); + } + return serdeParams; } + private static int findIndex(List columnNames, String column) { + for (int i = 0; i < columnNames.size(); i++) { + if (columnNames.get(i).equals(column)) { + return i; + } + } + throw new IllegalArgumentException("Invalid column name " + column + " in " + columnNames); + } + + private static FieldRewriter createRewriter(String encoderClass, Properties properties, + SerDeParameters parameters, Configuration job) throws SerDeException { + try { + FieldRewriter rewriter = + (FieldRewriter) ReflectionUtils.newInstance(Class.forName(encoderClass), job); + rewriter.init(parameters.columnNames, parameters.columnTypes, properties); + return rewriter; + } catch (Exception e) { + throw new SerDeException(e); + } + } + // The object for storing row data LazyStruct cachedLazyStruct; - // The wrapper for byte array - ByteArrayRef byteArrayRef; - /** * Deserialize a row from the Writable to a LazyObject. * @@ -331,18 +407,13 @@ public static SerDeParameters initSerdeParams(Configuration job, */ @Override public Object deserialize(Writable field) throws SerDeException { - if (byteArrayRef == null) { - byteArrayRef = new ByteArrayRef(); - } if (field instanceof BytesWritable) { BytesWritable b = (BytesWritable) field; // For backward-compatibility with hadoop 0.17 - byteArrayRef.setData(b.getBytes()); - cachedLazyStruct.init(byteArrayRef, 0, b.getLength()); + cachedLazyStruct.init(b.getBytes(), 0, b.getLength()); } else if (field instanceof Text) { Text t = (Text) field; - byteArrayRef.setData(t.getBytes()); - cachedLazyStruct.init(byteArrayRef, 0, t.getLength()); + cachedLazyStruct.init(t.getBytes(), 0, t.getLength()); } else { throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!"); @@ -424,7 +495,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector)); } - serializeField(serializeStream, f, foi, serdeParams); + serializeField(serializeStream, f, foi, serdeParams, i); } // TODO: The copy of data is unnecessary, but there is no work-around @@ -438,10 +509,14 @@ public Writable serialize(Object obj, ObjectInspector objInspector) } protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector objInspector, - SerDeParameters serdeParams) throws SerDeException { + SerDeParameters serdeParams, int index) throws SerDeException { try { + int pos = out.getCount(); serialize(out, obj, objInspector, serdeParams.separators, 1, serdeParams.nullSequence, serdeParams.escaped, serdeParams.escapeChar, serdeParams.needsEscape); + if (serdeParams.isEncoded(index)) { + serdeParams.encode(index, out, pos); + } } catch (IOException e) { throw new SerDeException(e); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java index 28b3f86..067f095 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyString.java @@ -36,15 +36,14 @@ public LazyString(LazyString copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { if (oi.isEscaped()) { byte escapeChar = oi.getEscapeChar(); - byte[] inputBytes = bytes.getData(); // First calculate the length of the output string int outputLength = 0; for (int i = 0; i < length; i++) { - if (inputBytes[start + i] != escapeChar) { + if (bytes[start + i] != escapeChar) { outputLength++; } else { outputLength++; @@ -54,7 +53,7 @@ public void init(ByteArrayRef bytes, int start, int length) { // Copy the data over, so that the internal state of Text will be set to // the required outputLength. - data.set(bytes.getData(), start, outputLength); + data.set(bytes, start, outputLength); // We need to copy the data byte by byte only in case the // "outputLength < length" (which means there is at least one escaped @@ -63,20 +62,20 @@ public void init(ByteArrayRef bytes, int start, int length) { int k = 0; byte[] outputBytes = data.getBytes(); for (int i = 0; i < length; i++) { - byte b = inputBytes[start + i]; + byte b = bytes[start + i]; if (b != escapeChar || i == length - 1) { outputBytes[k++] = b; } else { // get the next byte i++; - outputBytes[k++] = inputBytes[start + i]; + outputBytes[k++] = bytes[start + i]; } } assert (k == outputLength); } } else { // if the data is not escaped, simply copy the data. - data.set(bytes.getData(), start, length); + data.set(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java index 8a1ea46..26a71b5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -26,7 +27,6 @@ import org.apache.hadoop.hive.serde2.SerDeStatsStruct; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.io.Text; /** @@ -68,6 +68,9 @@ */ boolean[] fieldInited; + // non-null only for top level object (this struct is a row) + protected LazySimpleSerDe.SerDeParameters serdeParams; + /** * Construct a LazyStruct object with the ObjectInspector. */ @@ -78,10 +81,10 @@ public LazyStruct(LazySimpleStructObjectInspector oi) { /** * Set the row data for this LazyStruct. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; serializedSize = length; @@ -101,8 +104,7 @@ private void parse() { byte escapeChar = oi.getEscapeChar(); if (fields == null) { - List fieldRefs = ((StructObjectInspector) oi) - .getAllStructFieldRefs(); + List fieldRefs = oi.getAllStructFieldRefs(); fields = new LazyObject[fieldRefs.size()]; for (int i = 0; i < fields.length; i++) { fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i) @@ -118,7 +120,6 @@ private void parse() { int fieldId = 0; int fieldByteBegin = start; int fieldByteEnd = start; - byte[] bytes = this.bytes.getData(); // Go through all bytes in the byte[] while (fieldByteEnd <= structByteEnd) { @@ -210,13 +211,24 @@ private Object uncheckedGetField(int fieldID) { int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; if ((fieldLength < 0) || (fieldLength == nullSequence.getLength() && LazyUtils.compare(bytes - .getData(), fieldByteBegin, fieldLength, nullSequence.getBytes(), + , fieldByteBegin, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0)) { return null; } + byte[] binary = bytes; if (!fieldInited[fieldID]) { fieldInited[fieldID] = true; - fields[fieldID].init(bytes, fieldByteBegin, fieldLength); + if (serdeParams != null && serdeParams.isEncoded(fieldID)) { + try { + serdeParams.decode(fieldID, binary, fieldByteBegin, fieldLength); + } catch (IOException e) { + throw new RuntimeException(e); + } + binary = serdeParams.output.getData(); + fieldLength = serdeParams.output.getCount(); + fieldByteBegin = 0; + } + fields[fieldID].init(binary, fieldByteBegin, fieldLength); } return fields[fieldID].getObject(); } @@ -275,4 +287,8 @@ protected void setFieldInited(boolean[] fieldInited) { public long getRawDataSerializedSize() { return serializedSize; } + + public void setSerdeParams(LazySimpleSerDe.SerDeParameters serdeParams) { + this.serdeParams = serdeParams; + } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java index 27895c5..102c020 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyTimestamp.java @@ -57,10 +57,10 @@ public LazyTimestamp(LazyTimestamp copy) { * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { String s = null; try { - s = new String(bytes.getData(), start, length, "US-ASCII"); + s = new String(bytes, start, length, "US-ASCII"); } catch (UnsupportedEncodingException e) { LOG.error(e); s = ""; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java index 9f6bc3f..a50859f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java @@ -63,10 +63,10 @@ public LazyUnion(LazyUnionObjectInspector oi) { /** * Set the row data for this LazyUnion. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; } @@ -87,7 +87,6 @@ private void parse() { int unionByteEnd = start + length; int fieldByteEnd = start; - byte[] bytes = this.bytes.getData(); // Go through all bytes in the byte[] while (fieldByteEnd < unionByteEnd) { if (bytes[fieldByteEnd] != separator) { @@ -127,7 +126,7 @@ private Object uncheckedGetField() { Text nullSequence = oi.getNullSequence(); int fieldLength = start + length - startPosition; if (fieldLength != 0 && fieldLength == nullSequence.getLength() && - LazyUtils.compare(bytes.getData(), startPosition, fieldLength, + LazyUtils.compare(bytes, startPosition, fieldLength, nullSequence.getBytes(), 0, nullSequence.getLength()) == 0) { return null; } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java index 13d1b11..cd7bd12 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyVoid.java @@ -31,6 +31,6 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java index 55f96ee..af6c8fe 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java @@ -21,8 +21,6 @@ import java.util.Arrays; import java.util.List; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; -import org.apache.hadoop.hive.serde2.lazy.LazyObject; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector; @@ -91,10 +89,10 @@ protected LazyBinaryArray(LazyBinaryListObjectInspector oi) { /** * Set the row data for this LazyBinaryArray. * - * @see LazyObject#init(ByteArrayRef, int, int) + * @see org.apache.hadoop.hive.serde2.lazy.LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; } @@ -122,8 +120,6 @@ private void adjustArraySize(int newSize) { */ private void parse() { - byte[] bytes = this.bytes.getData(); - // get the vlong that represents the map size LazyBinaryUtils.readVInt(bytes, start, vInt); arraySize = vInt.value; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java index d398285..6246d84 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBinary.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; import org.apache.hadoop.io.BytesWritable; @@ -38,8 +37,8 @@ public LazyBinaryBinary(WritableBinaryObjectInspector baoi) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (length > -1); - data.set(bytes.getData(), start, length); + data.set(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java index 96969fb..545d304 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; import org.apache.hadoop.io.BooleanWritable; @@ -48,9 +47,9 @@ public LazyBinaryBoolean(LazyBinaryBoolean copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (1 == length); - byte val = bytes.getData()[start]; + byte val = bytes[start]; if (val == 0) { data.set(false); } else if (val == 1) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java index d430c96..b6b377f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; /** @@ -38,8 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (1 == length); - data.set(bytes.getData()[start]); + data.set(bytes[start]); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java index d0c2504..889beb2 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDate.java @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDateObjectInspector; @@ -54,7 +53,7 @@ * @param length */ @Override - public void init(ByteArrayRef bytes, int start, int length) { - data.setFromBytes(bytes.getData(), start, length, vInt); + public void init(byte[] bytes, int start, int length) { + data.setFromBytes(bytes, start, length, vInt); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java index c2c4f9e..b9889a5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; /** @@ -38,9 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (8 == length); - data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes - .getData(), start))); + data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes, start))); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java index 11e13dc..526e8ea 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; import org.apache.hadoop.io.FloatWritable; @@ -38,9 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (4 == length); - data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes - .getData(), start))); + data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes, start))); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java index b64a3b4..52b71c4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveChar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; import org.apache.hadoop.io.Text; @@ -41,10 +40,10 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { // re-use existing text member in char writable Text textValue = data.getTextValue(); - textValue.set(bytes.getData(), start, length); + textValue.set(bytes, start, length); data.enforceMaxLength(maxLength); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java index e56e2ca..7525f80 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveDecimal.java @@ -19,7 +19,6 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; @@ -43,8 +42,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { - data.setFromBytes(bytes.getData(), start, length); + public void init(byte[] bytes, int start, int length) { + data.setFromBytes(bytes, start, length); HiveDecimal dec = data.getHiveDecimal(precision, scale); data = dec == null ? null : new HiveDecimalWritable(dec); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java index 26df6f3..f96e49e 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryHiveVarchar.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveVarcharObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.Text; @@ -41,10 +40,10 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { // re-use existing text member in varchar writable Text textValue = data.getTextValue(); - textValue.set(bytes.getData(), start, length); + textValue.set(bytes, start, length); data.enforceMaxLength(maxLength); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java index f2a6943..7452103 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; import org.apache.hadoop.io.IntWritable; @@ -46,8 +45,8 @@ VInt vInt = new LazyBinaryUtils.VInt(); @Override - public void init(ByteArrayRef bytes, int start, int length) { - LazyBinaryUtils.readVInt(bytes.getData(), start, vInt); + public void init(byte[] bytes, int start, int length) { + LazyBinaryUtils.readVInt(bytes, start, vInt); assert (length == vInt.length); data.set(vInt.value); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java index ea0eba2..073e6d4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; import org.apache.hadoop.io.LongWritable; @@ -46,8 +45,8 @@ VLong vLong = new LazyBinaryUtils.VLong(); @Override - public void init(ByteArrayRef bytes, int start, int length) { - LazyBinaryUtils.readVLong(bytes.getData(), start, vLong); + public void init(byte[] bytes, int start, int length) { + LazyBinaryUtils.readVLong(bytes, start, vLong); assert (length == vLong.length); data.set(vLong.value); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java index 31ad78e..a866197 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java @@ -23,7 +23,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; @@ -100,10 +99,10 @@ protected LazyBinaryMap(LazyBinaryMapObjectInspector oi) { /** * Set the row data for this LazyBinaryMap. * - * @see LazyBinaryObject#init(ByteArrayRef, int, int) + * @see org.apache.hadoop.hive.serde2.lazy.LazyObjectBase#init(byte[], int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; } @@ -137,8 +136,6 @@ protected void adjustArraySize(int newSize) { */ private void parse() { - byte[] bytes = this.bytes.getData(); - // get the VInt that represents the map size LazyBinaryUtils.readVInt(bytes, start, vInt); mapSize = vInt.value; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java index 1d0783f..aa50c86 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -29,7 +28,7 @@ public abstract class LazyBinaryNonPrimitive extends LazyBinaryObject { - protected ByteArrayRef bytes; + protected byte[] bytes; protected int start; protected int length; @@ -46,7 +45,7 @@ public Object getObject() { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { if (null == bytes) { throw new RuntimeException("bytes cannot be null!"); } @@ -60,6 +59,6 @@ public void init(ByteArrayRef bytes, int start, int length) { @Override public int hashCode() { - return LazyUtils.hashBytes(bytes.getData(), start, length); + return LazyUtils.hashBytes(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java index b188c3f..e4c1d53 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java @@ -27,8 +27,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.type.Decimal128; -import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.ByteStream; @@ -38,12 +36,10 @@ import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -150,32 +146,24 @@ public ObjectInspector getObjectInspector() throws SerDeException { return BytesWritable.class; } - // The wrapper for byte array - ByteArrayRef byteArrayRef; - /** * Deserialize a table record to a lazybinary struct. */ @Override public Object deserialize(Writable field) throws SerDeException { - if (byteArrayRef == null) { - byteArrayRef = new ByteArrayRef(); - } if (field instanceof BinaryComparable) { BinaryComparable b = (BinaryComparable) field; if (b.getLength() == 0) { return null; } // For backward-compatibility with hadoop 0.17 - byteArrayRef.setData(b.getBytes()); - cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getLength()); + cachedLazyBinaryStruct.init(b.getBytes(), 0, b.getLength()); } else if (field instanceof Text) { Text t = (Text) field; if (t.getLength() == 0) { return null; } - byteArrayRef.setData(t.getBytes()); - cachedLazyBinaryStruct.init(byteArrayRef, 0, t.getLength()); + cachedLazyBinaryStruct.init(t.getBytes(), 0, t.getLength()); } else { throw new SerDeException(getClass().toString() + ": expects either BinaryComparable or Text object!"); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java index 1bc01ce..b5d58a6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hive.serde2.lazybinary; import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; /** @@ -38,8 +37,8 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (2 == length); - data.set(LazyBinaryUtils.byteArrayToShort(bytes.getData(), start)); + data.set(LazyBinaryUtils.byteArrayToShort(bytes, start)); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java index 9f691d7..4d2292a 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; import org.apache.hadoop.io.Text; @@ -43,8 +42,8 @@ public LazyBinaryString(LazyBinaryString copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { assert (length > -1); - data.set(bytes.getData(), start, length); + data.set(bytes, start, length); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java index caf3517..36f1e90 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java @@ -24,7 +24,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.SerDeStatsStruct; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -88,7 +87,7 @@ protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { super.init(bytes, start, length); parsed = false; serializedSize = length; @@ -126,7 +125,6 @@ private void parse() { int fieldId = 0; int structByteEnd = start + length; - byte[] bytes = this.bytes.getData(); byte nullByte = bytes[start]; int lastFieldByteEnd = start + 1; diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java index 98dd81c..0eeb842 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryTimestamp.java @@ -20,7 +20,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.io.TimestampWritable; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector; /** @@ -47,11 +46,10 @@ * @param bytes * @param start * @param length - * If length is 4, no decimal bytes follow, otherwise read following bytes - * as VInt and reverse its value + * If length is 4, no decimal bytes follow, otherwise read following bytes */ @Override - public void init(ByteArrayRef bytes, int start, int length) { - data.set(bytes.getData(), start); + public void init(byte[] bytes, int start, int length) { + data.set(bytes, start); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java index 4b7153d..4103db6 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryVoid.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hive.serde2.lazybinary; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableVoidObjectInspector; import org.apache.hadoop.io.Writable; @@ -36,6 +35,6 @@ } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java index 64a237e..b3cde9d 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioBoolean.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBoolean; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector; /** * LazyBooleanBinary for storing a boolean value as an BooleanWritable. This class complements class - * LazyBoolean. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyBoolean. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the boolean value stored from the default binary format. */ public class LazyDioBoolean extends LazyBoolean { @@ -52,12 +51,12 @@ public LazyDioBoolean(LazyDioBoolean copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { boolean value = false; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readBoolean(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java index 8cdbfd8..c9e0dc5 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioByte.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyByte; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector; /** * LazyByteBinary for storing a byte value as a ByteWritable. This class complements class - * LazyByte. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyByte. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the raw byte value stored. */ public class LazyDioByte extends LazyByte { @@ -45,12 +44,12 @@ public LazyDioByte(LazyDioByte copy) { } @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { byte value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readByte(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java index c86705d..c92f7db 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioDouble.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyDouble; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector; /** * LazyDoubleBinary for storing a double value as a DoubleWritable. This class complements class - * LazyDouble. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyDouble. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the double value stored from the default binary format. */ public class LazyDioDouble extends LazyDouble { @@ -52,12 +51,12 @@ public LazyDioDouble(LazyDoubleObjectInspector oi) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { double value = 0.0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readDouble(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java index fc3c1b2..fa393d9 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioFloat.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFloat; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector; /** * LazyFloatBinary for storing a float value as a FloatWritable. This class complements class - * LazyFloat. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyFloat. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the float value stored from the default binary format. */ public class LazyDioFloat extends LazyFloat { @@ -52,12 +51,12 @@ public LazyDioFloat(LazyDioFloat copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { float value = 0.0F; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readFloat(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java index e7eca65..513ba66 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioInteger.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector; /** * LazyIntegerBinary for storing an int value as an IntWritable. This class complements class - * LazyInteger. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyInteger. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the integer value stored from the default binary format. */ public class LazyDioInteger extends LazyInteger { @@ -52,12 +51,12 @@ public LazyDioInteger(LazyDioInteger copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { int value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readInt(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java index 98c7058..3d03480 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioLong.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyLong; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector; /** * LazyLongBinary for storing a long value as a LongWritable. This class complements class - * LazyLong. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyLong. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the long value stored in the default binary format. */ public class LazyDioLong extends LazyLong { @@ -52,12 +51,12 @@ public LazyDioLong(LazyDioLong copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { long value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readLong(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java index 137d0c8..b175766 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazydio/LazyDioShort.java @@ -22,13 +22,12 @@ import java.io.IOException; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyShort; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector; /** * LazyShortBinary for storing a short value as a ShortWritable. This class complements class - * LazyShort. It's primary difference is the {@link #init(ByteArrayRef, int, int)} method, which + * LazyShort. It's primary difference is the {@link #init(byte[], int, int)} method, which * reads the short value stored from the default binary format. */ public class LazyDioShort extends LazyShort { @@ -52,12 +51,12 @@ public LazyDioShort(LazyDioShort copy) { * (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef, int, int) */ @Override - public void init(ByteArrayRef bytes, int start, int length) { + public void init(byte[] bytes, int start, int length) { short value = 0; try { - in = new ByteStream.Input(bytes.getData(), start, length); + in = new ByteStream.Input(bytes, start, length); din = new DataInputStream(in); value = din.readShort(); data.set(value); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java index 5e1a369..ef66e97 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/DelegatedStructObjectInspector.java @@ -56,6 +56,9 @@ public String getFieldName() { public ObjectInspector getFieldObjectInspector() { return field.getFieldObjectInspector(); } + public int getFieldID() { + return field.getFieldID(); + } public String getFieldComment() { return field.getFieldComment(); } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java index bd3cdd4..2e31759 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ReflectionStructObjectInspector.java @@ -42,6 +42,7 @@ * */ public static class MyField implements StructField { + protected int fieldID; protected Field field; protected ObjectInspector fieldObjectInspector; @@ -49,19 +50,28 @@ protected MyField() { super(); } - public MyField(Field field, ObjectInspector fieldObjectInspector) { + public MyField(int fieldID, Field field, ObjectInspector fieldObjectInspector) { + this.fieldID = fieldID; this.field = field; this.fieldObjectInspector = fieldObjectInspector; } + @Override public String getFieldName() { return field.getName().toLowerCase(); } + @Override public ObjectInspector getFieldObjectInspector() { return fieldObjectInspector; } + @Override + public int getFieldID() { + return fieldID; + } + + @Override public String getFieldComment() { return null; } @@ -123,7 +133,7 @@ void init(Class objectClass, for (int i = 0; i < reflectionFields.length; i++) { if (!shouldIgnoreField(reflectionFields[i].getName())) { reflectionFields[i].setAccessible(true); - fields.add(new MyField(reflectionFields[i], structFieldObjectInspectors + fields.add(new MyField(i, reflectionFields[i], structFieldObjectInspectors .get(used++))); } } diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructField.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructField.java index 67827d6..b6859b1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructField.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StructField.java @@ -37,6 +37,13 @@ ObjectInspector getFieldObjectInspector(); /** + * Get the fieldID for the field + * + * @return + */ + int getFieldID(); + + /** * Get the comment for the field. May be null if no comment provided. */ String getFieldComment(); diff --git serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionStructObjectInspector.java index 60e55ec..a81925b 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/UnionStructObjectInspector.java @@ -60,6 +60,10 @@ public ObjectInspector getFieldObjectInspector() { return structField.getFieldObjectInspector(); } + public int getFieldID() { + return structID; + } + public String getFieldComment() { return structField.getFieldComment(); } diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java index 868dd4c..40a0a81 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyPrimitive.java @@ -44,9 +44,7 @@ */ public static void initLazyObject(LazyObject lo, byte[] data, int start, int length) { - ByteArrayRef b = new ByteArrayRef(); - b.setData(data); - lo.init(b, start, length); + lo.init(data, start, length); } /** diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java index 69c891d..3f5491a 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java @@ -37,7 +37,6 @@ import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyBinary; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.AbstractPrimitiveLazyObjectInspector; @@ -48,7 +47,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaBinaryObjectInspector; @@ -600,15 +598,12 @@ public void testWritableBinaryObjectInspector() throws Throwable { public void testLazyBinaryObjectInspector() throws Throwable { //create input ByteArrayRef - ByteArrayRef inpBARef = new ByteArrayRef(); - inpBARef.setData(inpBArray); - AbstractPrimitiveLazyObjectInspector binInspector = LazyPrimitiveObjectInspectorFactory .getLazyObjectInspector(TypeInfoFactory.binaryTypeInfo, false, (byte)0); //create LazyBinary initialed with inputBA LazyBinary lazyBin = (LazyBinary) LazyFactory.createLazyObject(binInspector); - lazyBin.init(inpBARef, 0, inpBArray.length); + lazyBin.init(inpBArray, 0, inpBArray.length); //use inspector to get a byte[] out of LazyBinary byte[] outBARef = (byte[]) binInspector.getPrimitiveJavaObject(lazyBin);