Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java (revision 799433) +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java (working copy) @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.serde2.binarysortable; public class MyTestInnerStruct { Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java (revision 799433) +++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java (working copy) @@ -1,3 +1,20 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package org.apache.hadoop.hive.serde2.binarysortable; import java.util.List; @@ -12,4 +29,21 @@ String myString; MyTestInnerStruct myStruct; List myList; + + public MyTestClass() { + } + + public MyTestClass(Byte b, Short s, Integer i, Long l, + Float f, Double d, String st, MyTestInnerStruct is, + List li) { + this.myByte = b; + this.myShort = s; + this.myInt = i; + this.myLong = l; + this.myFloat = f; + this.myDouble = d; + this.myString = st; + this.myStruct = is; + this.myList = li; + } } Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (revision 0) @@ -0,0 +1,509 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.serde.Constants; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass; +import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.io.BytesWritable; + +import junit.framework.TestCase; + +public class TestLazyBinarySerDe extends TestCase { + + /** + * Convert a byte array to a hex string + * @param bytes the byte array + * @return the hex string + */ + public String hexString(BytesWritable bytes) { + StringBuilder sb = new StringBuilder(); + for (int i=0; i getRandIntegerArray(Random r) { + int length = r.nextInt(10); + ArrayList result = new ArrayList(length); + for(int i=0; i getRandStructArray(Random r) { + int length = r.nextInt(10); + ArrayList result = new ArrayList(length); + for(int i=0; i entryinput: mp.entrySet()) { + boolean bEqual = false; + for (Map.Entry entryoutput: outputmp.entrySet()) { + // find the same key + if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) { + if(0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) { + assertEquals(entryoutput.getValue(), entryinput.getValue()); + } else { + bEqual = true; + } + break; + } + } + if(!bEqual) + throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey()); + } + } + } + + /** + * The test entrance function + * @throws Throwable + */ + public void testLazyBinarySerDe() throws Throwable { + try { + + System.out.println("Beginning Test TestLazyBinarySerDe:"); + + // generate the data + int num = 1000; + Random r = new Random(1234); + MyTestClass rows[] = new MyTestClass[num]; + for (int i=0; i 0 ? null : Byte.valueOf((byte)r.nextInt()); + Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt()); + Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt()); + Long l = randField > 3 ? null : Long.valueOf(r.nextLong()); + Float f = randField > 4 ? null : Float.valueOf(r.nextFloat()); + Double d = randField > 5 ? null : Double.valueOf(r.nextDouble()); + String st = randField > 6 ? null : getRandString(r); + MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2); + List li = randField > 8 ? null: getRandIntegerArray(r); + MyTestClass t = new MyTestClass(b,s,n,l,f,d,st,is,li); + rows[i] = t; + } + + StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory + .getReflectionObjectInspector(MyTestClass.class, + ObjectInspectorOptions.JAVA); + + String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI); + String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI); + + // call the tests + // 1/ test LazyBinarySerDe + testLazyBinarySerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes)); + // 2/ test LazyBinaryMap + testLazyBinaryMap(r); + // 3/ test serialization and deserialization with different schemas + testShorterSchemaDeserialization(r); + // 4/ test serialization and deserialization with different schemas + testLongerSchemaDeserialization(r); + // 5/ test serialization and deserialization with different schemas + testShorterSchemaDeserialization1(r); + // 6/ test serialization and deserialization with different schemas + testLongerSchemaDeserialization1(r); + + System.out.println("Test TestLazyBinarySerDe passed!"); + } catch (Throwable e) { + e.printStackTrace(); + throw e; + } + } +} Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (revision 0) @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; + +public class MyTestClassBigger { + Byte myByte; + Short myShort; + Integer myInt; + Long myLong; + Float myFloat; + Double myDouble; + String myString; + MyTestInnerStruct myStruct; + List myList; + Map> myMap; + + public MyTestClassBigger() { + } + + public MyTestClassBigger(Byte b, Short s, Integer i, Long l, + Float f, Double d, String st, MyTestInnerStruct is, + List li, Map> mp) { + this.myByte = b; + this.myShort = s; + this.myInt = i; + this.myLong = l; + this.myFloat = f; + this.myDouble = d; + this.myString = st; + this.myStruct = is; + this.myList = li; + this.myMap = mp; + } +} Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java =================================================================== --- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (revision 0) +++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (revision 0) @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct; + +public class MyTestClassSmaller { + Byte myByte; + Short myShort; + Integer myInt; + Long myLong; + Float myFloat; + Double myDouble; + String myString; + MyTestInnerStruct myStruct; + + public MyTestClassSmaller() { + } + + public MyTestClassSmaller(Byte b, Short s, Integer i, Long l, + Float f, Double d, String st, MyTestInnerStruct is) { + this.myByte = b; + this.myShort = s; + this.myInt = i; + this.myLong = l; + this.myFloat = f; + this.myDouble = d; + this.myString = st; + this.myStruct = is; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (revision 799433) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (working copy) @@ -39,8 +39,8 @@ * SerDe classes should call the static functions in this library to create an ObjectInspector * to return to the caller of SerDe2.getObjectInspector(). * - * The reason of having caches here is that ObjectInspector is because ObjectInspectors do - * not have an internal state - so ObjectInspectors with the same construction parameters should + * The reason of having caches here is that ObjectInspectors do not have an internal + * state - so ObjectInspectors with the same construction parameters should * result in exactly the same ObjectInspector. */ public class LazyObjectInspectorFactory { Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java (revision 799433) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java (working copy) @@ -226,7 +226,6 @@ /** * Get the value object with the index without checking parsed. * @param index The index into the array starting from 0 - * @param nullSequence The byte sequence representing the NULL value */ private LazyObject uncheckedGetValue(int index) { Text nullSequence = oi.getNullSequence(); @@ -252,7 +251,6 @@ /** * Get the key object with the index without checking parsed. * @param index The index into the array starting from 0 - * @param nullSequence The byte sequence representing the NULL value */ private LazyPrimitive uncheckedGetKey(int index) { Text nullSequence = oi.getNullSequence(); Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java (revision 0) @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.io.IntWritable; + +/** + * LazyBinaryObject for integer which is serialized as VInt + * @see LazyBinaryUtils#readVInt(byte[], int, VInt) + */ +public class LazyBinaryInteger extends LazyBinaryPrimitive { + + LazyBinaryInteger(WritableIntObjectInspector oi) { + super(oi); + data = new IntWritable(); + } + + LazyBinaryInteger(LazyBinaryInteger copy) { + super(copy); + data = new IntWritable(); + } + + /** + * The reusable vInt for decoding the integer + */ + VInt vInt = new LazyBinaryUtils.VInt(); + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + LazyBinaryUtils.readVInt(bytes.getData(), start, vInt); + assert(length == vInt.length); + data.set(vInt.value); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java (revision 0) @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; + +/** + * LazyBinaryObject for short which takes two bytes. + */ +public class LazyBinaryShort extends LazyBinaryPrimitive { + + LazyBinaryShort(WritableShortObjectInspector oi) { + super(oi); + data = new ShortWritable(); + } + + LazyBinaryShort(LazyBinaryShort copy) { + super(copy); + data = new ShortWritable(); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + assert(2 == length); + data.set(LazyBinaryUtils.byteArrayToShort(bytes.getData(), start)); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java (revision 0) @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.io.LongWritable; + +/** + * LazyBinaryObject for long which stores as VLong. + * @see LazyBinaryUtils#readVLong(byte[], int, VLong) + */ +public class LazyBinaryLong extends LazyBinaryPrimitive { + + LazyBinaryLong(WritableLongObjectInspector oi) { + super(oi); + data = new LongWritable(); + } + + LazyBinaryLong(LazyBinaryLong copy) { + super(copy); + data = new LongWritable(); + } + + /** + * The reusable vLong for decoding the long + */ + VLong vLong = new LazyBinaryUtils.VLong(); + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + LazyBinaryUtils.readVLong(bytes.getData(), start, vLong); + assert(length == vLong.length); + data.set(vLong.value); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (revision 0) @@ -0,0 +1,341 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.hive.serde2.ByteStream.Output; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.WritableUtils; + +public class LazyBinaryUtils { + + /** + * Convert the byte array to an int starting from the given offset. + * Refer to code by aeden on DZone Snippets: + * @param b the byte array + * @param offset the array offset + * @return the integer + */ + public static int byteArrayToInt(byte[] b, int offset) { + int value = 0; + for (int i = 0; i < 4; i++) { + int shift = (4 - 1 - i) * 8; + value += (b[i + offset] & 0x000000FF) << shift; + } + return value; + } + + /** + * Convert the byte array to a long starting from the given offset. + * @param b the byte array + * @param offset the array offset + * @return the long + */ + public static long byteArrayToLong(byte[] b, int offset) { + long value = 0; + for (int i = 0; i < 8; i++) { + int shift = (8 - 1 - i) * 8; + value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift; + } + return value; + } + + /** + * Convert the byte array to a short starting from the given offset. + * @param b the byte array + * @param offset the array offset + * @return the short + */ + public static short byteArrayToShort(byte[] b, int offset) { + short value = 0; + value += (b[offset ] & 0x000000FF) << 8; + value += (b[offset+1] & 0x000000FF); + return value; + } + + /** + * Record is the unit that data is serialized in. + * A record includes two parts. The first part stores the + * size of the element and the second part stores the + * real element. + * size element + * record -> |----|-------------------------| + * + * A RecordInfo stores two information of a record, + * the size of the "size" part which is the element offset + * and the size of the element part which is element size. + */ + public static class RecordInfo { + public RecordInfo () { + elementOffset = 0; + elementSize = 0; + } + public byte elementOffset; + public int elementSize; + } + + /** + * Check a particular field and set its size and offset in bytes + * based on the field type and the bytes arrays. + * + * For void, boolean, byte, short, int, long, float and double, + * there is no offset and the size is fixed. For string, map, + * list, struct, the first four bytes are used to store the size. + * So the offset is 4 and the size is computed by concating the + * first four bytes together. The first four bytes are defined + * with respect to the offset in the bytes arrays. + * + * @param objectInspector object inspector of the field + * @param bytes bytes arrays store the table row + * @param offset offset of this field + * @param recordInfo modify this byteinfo object and return it + * @return size and offset in bytes of this field + */ + public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo) { + Category category = objectInspector.getCategory(); + switch (category) { + case PRIMITIVE: + PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector)objectInspector).getPrimitiveCategory(); + switch (primitiveCategory) { + case VOID: + case BOOLEAN: + case BYTE: + recordInfo.elementOffset = 0; + recordInfo.elementSize = 1; + break; + case SHORT: + recordInfo.elementOffset = 0; + recordInfo.elementSize = 2; + break; + case FLOAT: + recordInfo.elementOffset = 0; + recordInfo.elementSize = 4; + break; + case DOUBLE: + recordInfo.elementOffset = 0; + recordInfo.elementSize = 8; + break; + case INT: + recordInfo.elementOffset = 0; + recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); + break; + case LONG: + recordInfo.elementOffset = 0; + recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]); + break; + case STRING: + recordInfo.elementOffset = 4; + recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset); + break; + default: { + throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory); + } + } + break; + case LIST: + case MAP: + case STRUCT: + recordInfo.elementOffset = 4; + recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset); + break; + default : { + throw new RuntimeException("Unrecognized non-primitive type: " + category); + } + } + } + + /** + * A zero-compressed encoded long + * @see WritableUtils#readVLong(java.io.DataInput) + */ + public static class VLong { + public VLong() { + value = 0; + length = 0; + } + public long value; + public byte length; + }; + + /** + * Reads a zero-compressed encoded long from a byte array and returns it. + * @param bytes the byte array + * @param offset offset of the array to read from + * @param vlong storing the deserialized long and its size in byte + * @see WritableUtils#readVLong(java.io.DataInput) + */ + public static void readVLong(byte[] bytes, int offset, VLong vlong) { + byte firstByte = bytes[offset]; + vlong.length = (byte)WritableUtils.decodeVIntSize(firstByte); + if (vlong.length == 1) { + vlong.value = firstByte; + return; + } + long i = 0; + for (int idx = 0; idx < vlong.length-1; idx++) { + byte b = bytes[offset+1+idx]; + i = i << 8; + i = i | (b & 0xFF); + } + vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i); + } + + /** + * A zero-compressed encoded integer + * @see WritableUtils#readVInt(java.io.DataInput) + */ + public static class VInt { + public VInt() { + value = 0; + length = 0; + } + public int value; + public byte length; + }; + + /** + * Reads a zero-compressed encoded int from a byte array and returns it. + * @param bytes the byte array + * @param offset offset of the array to read from + * @param vint storing the deserialized int and its size in byte + * @see WritableUtils#readVInt(java.io.DataInput) + */ + public static void readVInt(byte[] bytes, int offset, VInt vInt) { + byte firstByte = bytes[offset]; + vInt.length = (byte)WritableUtils.decodeVIntSize(firstByte); + if (vInt.length == 1) { + vInt.value = firstByte; + return; + } + int i = 0; + for (int idx = 0; idx < vInt.length-1; idx++) { + byte b = bytes[offset+1+idx]; + i = i << 8; + i = i | (b & 0xFF); + } + vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i); + } + + /** + * Writes a zero-compressed encoded int to a byte array. + * @param byteStream the byte array/stream + * @param i the int + * @see LazyBinaryUtils#writeVLong(Output, long) + */ + public static void writeVInt(Output byteStream, int i) { + writeVLong(byteStream, i); + } + + /** + * Write a zero-compressed encoded long to a byte array. + * @param byteStream the byte array/stream + * @param l the long + * @see WritableUtils#writeVLong(java.io.DataOutput, long) + */ + public static void writeVLong(Output byteStream, long l) { + if (l >= -112 && l <= 127) { + byteStream.write((byte)l); + return; + } + + int len = -112; + if (l < 0) { + l ^= -1L; // take one's complement' + len = -120; + } + + long tmp = l; + while (tmp != 0) { + tmp = tmp >> 8; + len--; + } + + byteStream.write((byte)len); + + len = (len < -120) ? -(len + 120) : -(len + 112); + + for (int idx = len; idx != 0; idx--) { + int shiftbits = (idx - 1) * 8; + long mask = 0xFFL << shiftbits; + byteStream.write((byte)((l & mask) >> shiftbits)); + } + } + + static HashMap cachedLazyBinaryObjectInspector = new HashMap(); + + /** + * Returns the lazy binary object inspector that can be used to translate an object of that typeInfo + * to a standard object type. + * + * For primitive types, we use the standard writable object inspector. + */ + public static ObjectInspector getLazyBinaryObjectInspectorFromTypeInfo(TypeInfo typeInfo) { + ObjectInspector result = cachedLazyBinaryObjectInspector.get(typeInfo); + if (result == null) { + switch(typeInfo.getCategory()) { + case PRIMITIVE: { + result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + ((PrimitiveTypeInfo)typeInfo).getPrimitiveCategory()); + break; + } + case LIST: { + ObjectInspector elementObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo( + ((ListTypeInfo)typeInfo).getListElementTypeInfo()); + result = LazyBinaryObjectInspectorFactory.getLazyBinaryListObjectInspector(elementObjectInspector); + break; + } + case MAP: { + MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo; + ObjectInspector keyObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo()); + ObjectInspector valueObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo()); + result = LazyBinaryObjectInspectorFactory.getLazyBinaryMapObjectInspector(keyObjectInspector, valueObjectInspector); + break; + } + case STRUCT: { + StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo; + List fieldNames = structTypeInfo.getAllStructFieldNames(); + List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos(); + List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size()); + for(int i=0; i columnNames; + List columnTypes; + + TypeInfo rowTypeInfo; + ObjectInspector cachedObjectInspector; + + // The object for storing row data + LazyBinaryStruct cachedLazyBinaryStruct; + + /** + * Initialize the SerDe with configuration and table information + * @see SerDe#initialize(Configuration, Properties) + */ + @Override + public void initialize(Configuration conf, Properties tbl) + throws SerDeException { + // Get column names and types + String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS); + String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES); + if (columnNameProperty.length() == 0) { + columnNames = new ArrayList(); + } else { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } + if (columnTypeProperty.length() == 0) { + columnTypes = new ArrayList(); + } else { + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + } + assert(columnNames.size() == columnTypes.size()); + // Create row related objects + rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + // Create the object inspector and the lazy binary struct object + cachedObjectInspector = LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(rowTypeInfo); + cachedLazyBinaryStruct = (LazyBinaryStruct) LazyBinaryFactory + .createLazyBinaryObject(cachedObjectInspector); + // output debug info + LOG.debug("LazyBinarySerDe initialized with: columnNames=" + + columnNames + " columnTypes=" + columnTypes); + } + + /** + * Returns the ObjectInspector for the row. + * @see Deserializer#getObjectInspector() + */ + @Override + public ObjectInspector getObjectInspector() throws SerDeException { + return cachedObjectInspector; + } + + /** + * Returns the Writable Class after serialization. + * @see Serializer#getSerializedClass() + */ + @Override + public Class getSerializedClass() { + return BytesWritable.class; + } + + // The wrapper for byte array + ByteArrayRef byteArrayRef; + + /** + * Deserialize a table record to a lazybinary struct. + * @see Deserializer#deserialize(Writable) + */ + @Override + public Object deserialize(Writable field) throws SerDeException { + if (byteArrayRef == null) { + byteArrayRef = new ByteArrayRef(); + } + if (field instanceof BytesWritable) { + BytesWritable b = (BytesWritable)field; + // For backward-compatibility with hadoop 0.17 + byteArrayRef.setData(b.get()); + cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getSize()); + } else if (field instanceof Text) { + Text t = (Text)field; + byteArrayRef.setData(t.getBytes()); + cachedLazyBinaryStruct.init(byteArrayRef, 0, t.getLength()); + } else { + throw new SerDeException(getClass().toString() + + ": expects either BytesWritable or Text object!"); + } + return cachedLazyBinaryStruct; + } + + /** + * The reusable output buffer and serialize byte buffer. + */ + BytesWritable serializeBytesWritable = new BytesWritable(); + ByteStream.Output serializeByteStream = new ByteStream.Output(); + + /** + * Serialize an object to a byte buffer in a binary compact way. + * @see Serializer#serialize(Object, ObjectInspector) + */ + @Override + public Writable serialize(Object obj, ObjectInspector objInspector) + throws SerDeException { + // make sure it is a struct record + if (objInspector.getCategory() != Category.STRUCT) { + throw new SerDeException(getClass().toString() + + " can only serialize struct types, but we got: " + + objInspector.getTypeName()); + } + + serializeByteStream.reset(); + /* + * Interleave serializing one null byte and 8 struct fields + * in each round, in order to support data deserialization + * with different table schemas + */ + StructObjectInspector soi = (StructObjectInspector)objInspector; + List fields = soi.getAllStructFieldRefs(); + int size = fields.size(); + int lasti = 0; + byte nullByte = 0; + for (int i=0; i> 8)); + byteStream.write((byte) (v)); + return; + } + case INT: { + IntObjectInspector ioi = (IntObjectInspector)poi; + int v = ioi.get(obj); + LazyBinaryUtils.writeVInt(byteStream, v); + return; + } + case LONG: { + LongObjectInspector loi = (LongObjectInspector)poi; + long v = loi.get(obj); + LazyBinaryUtils.writeVLong(byteStream, v); + return; + } + case FLOAT: { + FloatObjectInspector foi = (FloatObjectInspector)poi; + int v = Float.floatToIntBits(foi.get(obj)); + byteStream.write((byte) (v >> 24)); + byteStream.write((byte) (v >> 16)); + byteStream.write((byte) (v >> 8)); + byteStream.write((byte) (v)); + return; + } + case DOUBLE: { + DoubleObjectInspector doi = (DoubleObjectInspector)poi; + long v = Double.doubleToLongBits(doi.get(obj)); + byteStream.write((byte) (v >> 56)); + byteStream.write((byte) (v >> 48)); + byteStream.write((byte) (v >> 40)); + byteStream.write((byte) (v >> 32)); + byteStream.write((byte) (v >> 24)); + byteStream.write((byte) (v >> 16)); + byteStream.write((byte) (v >> 8)); + byteStream.write((byte) (v)); + return; + } + case STRING: { + StringObjectInspector soi = (StringObjectInspector)poi; + Text t = soi.getPrimitiveWritableObject(obj); + /* write byte size of the string which is a integer of four bytes */ + int length = t.getLength(); + byteStream.write((byte) (length >> 24)); + byteStream.write((byte) (length >> 16)); + byteStream.write((byte) (length >> 8)); + byteStream.write((byte) (length)); + /** + * no need to store size of string because it + * is the same as its byte size. Just write + * the string itself + */ + byte[] data = t.getBytes(); + byteStream.write(data, 0, length); + return; + } + default: { + throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory()); + } + } + } + case LIST: { + ListObjectInspector loi = (ListObjectInspector)objInspector; + ObjectInspector eoi = loi.getListElementObjectInspector(); + + // 1/ reserve spaces for the byte size of the list + // which is a integer and takes four bytes + int byteSizeStart = byteStream.getCount(); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + int listStart = byteStream.getCount(); + + // 2/ write the size of the list as a VInt + int size = loi.getListLength(obj); + LazyBinaryUtils.writeVInt(byteStream, size); + + // 3/ write the null bytes + byte nullByte = 0; + for (int eid = 0; eid < size; eid++) { + // set the bit to 1 if an element is not null + if (null != loi.getListElement(obj, eid)) { + nullByte |= 1 << (eid%8); + } + // store the byte every eight elements or + // if this is the last element + if (7 == eid%8 || eid == size-1) { + byteStream.write(nullByte); + nullByte = 0; + } + } + + // 4/ write element by element from the list + for (int eid = 0; eid < size; eid++) { + serialize(byteStream, loi.getListElement(obj, eid), eoi); + } + + // 5/ update the list byte size + int listEnd = byteStream.getCount(); + int listSize = listEnd - listStart; + byte [] bytes = byteStream.getData(); + bytes[byteSizeStart ] = (byte) (listSize >> 24); + bytes[byteSizeStart + 1] = (byte) (listSize >> 16); + bytes[byteSizeStart + 2] = (byte) (listSize >> 8); + bytes[byteSizeStart + 3] = (byte) (listSize); + + return; + } + case MAP: { + MapObjectInspector moi = (MapObjectInspector)objInspector; + ObjectInspector koi = moi.getMapKeyObjectInspector(); + ObjectInspector voi = moi.getMapValueObjectInspector(); + Map map = moi.getMap(obj); + + // 1/ reserve spaces for the byte size of the map + // which is a integer and takes four bytes + int byteSizeStart = byteStream.getCount(); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + int mapStart = byteStream.getCount(); + + // 2/ write the size of the map which is a VInt + int size = map.size(); + LazyBinaryUtils.writeVInt(byteStream, size); + + // 3/ write the null bytes + int b = 0; + byte nullByte = 0; + for (Map.Entry entry: map.entrySet()) { + // set the bit to 1 if a key is not null + if (null != entry.getKey()) { + nullByte |= 1 << (b%8); + } else if (!nullMapKey) { + nullMapKey = true; + LOG.warn("Null map key encountered! Ignoring similar problems."); + } + b++; + // set the bit to 1 if a value is not null + if (null != entry.getValue()) { + nullByte |= 1 << (b%8); + } + b++; + // write the byte to stream every 4 key-value pairs + // or if this is the last key-value pair + if (0 == b%8 || b == size*2) { + byteStream.write(nullByte); + nullByte = 0; + } + } + + // 4/ write key-value pairs one by one + for(Map.Entry entry: map.entrySet()) { + serialize(byteStream, entry.getKey(), koi); + serialize(byteStream, entry.getValue(), voi); + } + + // 5/ update the byte size of the map + int mapEnd = byteStream.getCount(); + int mapSize = mapEnd - mapStart; + byte [] bytes = byteStream.getData(); + bytes[byteSizeStart ] = (byte) (mapSize >> 24); + bytes[byteSizeStart + 1] = (byte) (mapSize >> 16); + bytes[byteSizeStart + 2] = (byte) (mapSize >> 8); + bytes[byteSizeStart + 3] = (byte) (mapSize); + + return; + } + case STRUCT: { + StructObjectInspector soi = (StructObjectInspector)objInspector; + List fields = soi.getAllStructFieldRefs(); + + // 1/ reserve spaces for the byte size of the struct + // which is a integer and takes four bytes + int byteSizeStart = byteStream.getCount(); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + byteStream.write((byte) 0); + int structStart = byteStream.getCount(); + + // 2/ Interleave serializing one null byte and 8 struct fields + // in each round, in order to support data deserialization + // with a larger table schema + int size = fields.size(); + int lasti = 0; + byte nullByte = 0; + for (int i=0; i> 24); + bytes[byteSizeStart + 1] = (byte) (structSize >> 16); + bytes[byteSizeStart + 2] = (byte) (structSize >> 8); + bytes[byteSizeStart + 3] = (byte) (structSize); + + return; + } + default: { + throw new RuntimeException("Unrecognized type: " + objInspector.getCategory()); + } + } + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java (revision 0) @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.io.ByteWritable; + +/** + * LazyBinaryObject for byte which takes one byte + */ +public class LazyBinaryByte extends LazyBinaryPrimitive { + + LazyBinaryByte(LazyBinaryByte copy) { + super(copy); + data = new ByteWritable(); + } + + LazyBinaryByte(WritableByteObjectInspector oi) { + super(oi); + data = new ByteWritable(); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + assert(1 == length); + data.set(bytes.getData()[start]); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java (revision 0) @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.io.FloatWritable; + +/** + * LazyBinaryObject for float which takes four bytes. + */ +public class LazyBinaryFloat extends LazyBinaryPrimitive { + + LazyBinaryFloat(WritableFloatObjectInspector oi) { + super(oi); + data = new FloatWritable(); + } + + LazyBinaryFloat(LazyBinaryFloat copy) { + super(copy); + data = new FloatWritable(); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + assert (4 == length); + data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes.getData(), start))); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (revision 0) @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** + * LazyBinaryObject stores an object in a binary format in a byte[]. + * For example, a double takes four bytes. + * + * A LazyBinaryObject can represent any primitive object or hierarchical object + * like string, list, map or struct. + */ +public abstract class LazyBinaryObject { + + OI oi; + + /** + * Create a LazyBinaryObject. + * @param oi Derived classes can access meta information about this Lazy + * Binary Object (e.g, length, null-bits) from it. + */ + protected LazyBinaryObject(OI oi) { + this.oi = oi; + } + + /** + * Set the data for this LazyBinaryObject. + * We take ByteArrayRef instead of byte[] so that we will be able to drop + * the reference to byte[] by a single assignment. + * The ByteArrayRef object can be reused across multiple rows. + * + * Never call this function if the object represent a null!!! + * + * @param bytes The wrapper of the byte[]. + * @param start The start position inside the bytes. + * @param length The length of the data, starting from "start" + * @see ByteArrayRef + */ + public abstract void init(ByteArrayRef bytes, int start, int length); + + /** + * If the LazyBinaryObject is a primitive Object, then deserialize it and return + * the actual primitive Object. + * Otherwise (string, list, map, struct), return this. + */ + public abstract Object getObject(); +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java (revision 0) @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; + +/** + * The serialization of LazyBinaryString is very simple: + * start A end + * bytes[] -> |---------------------------------| + * + * Section A is just an array of bytes which are exactly + * the Text contained in this object. + * + */ +public class LazyBinaryString extends LazyBinaryPrimitive { + + LazyBinaryString(WritableStringObjectInspector OI) { + super(OI); + data = new Text(); + } + + public LazyBinaryString(LazyBinaryString copy) { + super(copy); + data = new Text(copy.data); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + assert(length > -1); + data.set(bytes.getData(), start, length); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (revision 0) @@ -0,0 +1,91 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector; + +public class LazyBinaryFactory { + + /** + * Create a lazy binary primitive class given the type name. + */ + public static LazyBinaryPrimitive createLazyBinaryPrimitiveClass(PrimitiveObjectInspector oi) { + PrimitiveCategory p = oi.getPrimitiveCategory(); + switch(p) { + case BOOLEAN: { + return new LazyBinaryBoolean((WritableBooleanObjectInspector)oi); + } + case BYTE: { + return new LazyBinaryByte((WritableByteObjectInspector)oi); + } + case SHORT: { + return new LazyBinaryShort((WritableShortObjectInspector)oi); + } + case INT: { + return new LazyBinaryInteger((WritableIntObjectInspector)oi); + } + case LONG: { + return new LazyBinaryLong((WritableLongObjectInspector)oi); + } + case FLOAT: { + return new LazyBinaryFloat((WritableFloatObjectInspector)oi); + } + case DOUBLE: { + return new LazyBinaryDouble((WritableDoubleObjectInspector)oi); + } + case STRING: { + return new LazyBinaryString((WritableStringObjectInspector)oi); + } + default: { + throw new RuntimeException("Internal error: no LazyBinaryObject for " + p); + } + } + } + + /** + * Create a hierarchical LazyBinaryObject based on the given typeInfo. + */ + public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) { + ObjectInspector.Category c = oi.getCategory(); + switch(c) { + case PRIMITIVE: + return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector)oi); + case MAP: + return new LazyBinaryMap((LazyBinaryMapObjectInspector)oi); + case LIST: + return new LazyBinaryArray((LazyBinaryListObjectInspector)oi); + case STRUCT: + return new LazyBinaryStruct((LazyBinaryStructObjectInspector)oi); + } + + throw new RuntimeException("Hive LazyBinarySerDe Internal error."); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java (revision 0) @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary.objectinspector; + +import java.util.Map; + +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector; + +/** + * ObjectInspector for LazyBinaryMap + * @see LazyBinaryMap + */ +public class LazyBinaryMapObjectInspector extends StandardMapObjectInspector { + + protected LazyBinaryMapObjectInspector(ObjectInspector mapKeyObjectInspector, + ObjectInspector mapValueObjectInspector) { + super(mapKeyObjectInspector, mapValueObjectInspector); + } + + @Override + public Map getMap(Object data) { + if (data == null) { + return null; + } + return ((LazyBinaryMap)data).getMap(); + } + + @Override + public int getMapSize(Object data) { + if (data == null) { + return -1; + } + return ((LazyBinaryMap)data).getMapSize(); + } + + @Override + public Object getMapValueElement(Object data, Object key) { + if (data == null) { + return -1; + } + return ((LazyBinaryMap)data).getMapValueElement(key); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java (revision 0) @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary.objectinspector; + +import java.util.List; + +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; + +/** + * ObjectInspector for LazyBinaryStruct + * @see LazyBinaryStruct + */ +public class LazyBinaryStructObjectInspector extends StandardStructObjectInspector { + + protected LazyBinaryStructObjectInspector(List structFieldNames, List structFieldObjectInspectors) { + super(structFieldNames, structFieldObjectInspectors); + } + + protected LazyBinaryStructObjectInspector(List fields) { + super(fields); + } + + @Override + public Object getStructFieldData(Object data, StructField fieldRef) { + if (data == null) { + return null; + } + LazyBinaryStruct struct = (LazyBinaryStruct)data; + MyField f = (MyField) fieldRef; + + int fieldID = f.getFieldID(); + assert(fieldID >= 0 && fieldID < fields.size()); + + return struct.getField(fieldID); + } + + @Override + public List getStructFieldsDataAsList(Object data) { + if (data == null) { + return null; + } + LazyBinaryStruct struct = (LazyBinaryStruct)data; + return struct.getFieldsAsList(); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java (revision 0) @@ -0,0 +1,66 @@ +package org.apache.hadoop.hive.serde2.lazybinary.objectinspector; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** + * ObjectInspectorFactory is the primary way to create new ObjectInspector instances. + * + * SerDe classes should call the static functions in this library to create an ObjectInspector + * to return to the caller of SerDe2.getObjectInspector(). + * + * The reason of having caches here is that ObjectInspectors do not have an internal + * state - so ObjectInspectors with the same construction parameters should + * result in exactly the same ObjectInspector. + */ + +public class LazyBinaryObjectInspectorFactory { + + static HashMap, LazyBinaryStructObjectInspector> cachedLazyBinaryStructObjectInspector = + new HashMap, LazyBinaryStructObjectInspector>(); + public static LazyBinaryStructObjectInspector getLazyBinaryStructObjectInspector(List structFieldNames, + List structFieldObjectInspectors) { + ArrayList signature = new ArrayList(); + signature.add(structFieldNames); + signature.add(structFieldObjectInspectors); + LazyBinaryStructObjectInspector result = cachedLazyBinaryStructObjectInspector.get(signature); + if (result == null) { + result = new LazyBinaryStructObjectInspector(structFieldNames, structFieldObjectInspectors); + cachedLazyBinaryStructObjectInspector.put(signature, result); + } + return result; + } + + static HashMap, LazyBinaryListObjectInspector> cachedLazyBinaryListObjectInspector = + new HashMap, LazyBinaryListObjectInspector>(); + public static LazyBinaryListObjectInspector getLazyBinaryListObjectInspector( + ObjectInspector listElementObjectInspector) { + ArrayList signature = new ArrayList(); + signature.add(listElementObjectInspector); + LazyBinaryListObjectInspector result = cachedLazyBinaryListObjectInspector.get(signature); + if (result == null) { + result = new LazyBinaryListObjectInspector(listElementObjectInspector); + cachedLazyBinaryListObjectInspector.put(signature, result); + } + return result; + } + + static HashMap, LazyBinaryMapObjectInspector> cachedLazyBinaryMapObjectInspector = + new HashMap, LazyBinaryMapObjectInspector>(); + public static LazyBinaryMapObjectInspector getLazyBinaryMapObjectInspector( + ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) { + ArrayList signature = new ArrayList(); + signature.add(mapKeyObjectInspector); + signature.add(mapValueObjectInspector); + LazyBinaryMapObjectInspector result = cachedLazyBinaryMapObjectInspector.get(signature); + if (result == null) { + result = new LazyBinaryMapObjectInspector(mapKeyObjectInspector, + mapValueObjectInspector); + cachedLazyBinaryMapObjectInspector.put(signature, result); + } + return result; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java (revision 0) @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary.objectinspector; + +import java.util.List; + +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray; +import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +/** + * ObjectInspector for LazyBinaryList + * @see LazyBinaryList + */ +public class LazyBinaryListObjectInspector extends StandardListObjectInspector { + + protected LazyBinaryListObjectInspector( + ObjectInspector listElementObjectInspector) { + super(listElementObjectInspector); + } + + @Override + public List getList(Object data) { + if (data == null) { + return null; + } + LazyBinaryArray array = (LazyBinaryArray) data; + return array.getList(); + } + + @Override + public Object getListElement(Object data, int index) { + if (data == null) { + return null; + } + LazyBinaryArray array = (LazyBinaryArray) data; + return array.getListElementObject(index); + } + + @Override + public int getListLength(Object data) { + if (data == null) { + return -1; + } + LazyBinaryArray array = (LazyBinaryArray) data; + return array.getListLength(); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java (revision 0) @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.io.Writable; +/** + * Defines a LazyBianryPrimitive. + * + * {@data} will be initialized to corresponding types in + * different LazyBinary primitive classes. For example, {@data} will + * be a BooleanWritable in the LazyBinaryBoolean class. + * + * There is no null flag any more, + * @see {@link LazyBinaryObject#init(ByteArrayRef, int, int)} + * + */ +public abstract class LazyBinaryPrimitive extends LazyBinaryObject { + + LazyBinaryPrimitive(OI oi) { + super(oi); + } + + LazyBinaryPrimitive(LazyBinaryPrimitive copy) { + super(copy.oi); + } + + T data; + + /** + * Returns the primitive object represented by this LazyBinaryObject. + * This is useful because it can make sure we have "null" for null objects. + */ + public Object getObject() { + return data; + } + + public T getWritableObject() { + return data; + } + + public String toString() { + return data.toString(); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java (revision 0) @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector; +import org.apache.hadoop.hive.serde2.io.DoubleWritable; + +/** + * LazyBinaryObject for double which takes eight bytes + */ +public class LazyBinaryDouble extends LazyBinaryPrimitive { + + LazyBinaryDouble(WritableDoubleObjectInspector oi) { + super(oi); + data = new DoubleWritable(); + } + + LazyBinaryDouble(LazyBinaryDouble copy) { + super(copy); + data = new DoubleWritable(); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + assert(8 == length); + data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes.getData(), start))); + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java (revision 0) @@ -0,0 +1,333 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyObject; +import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; + +/** + * LazyBinaryMap is serialized as follows: + * start A b c b c b c end + * bytes[] -> |--------|---|---|---|---| ... |---|---| + * + * Section A is the null-bytes. Suppose the map has + * N key-value pairs, then there are (N*2+7)/8 bytes used as null-bytes. + * Each bit corresponds to a key or a value and it indicates whether + * that key or value is null (0) or not null (1). + * + * After A, all the bytes are actual serialized data of the map, + * which are key-value pairs. b represent the keys and c represent + * the values. Each of them is again a LazyBinaryObject. + * + */ + +public class LazyBinaryMap extends LazyBinaryNonPrimitive { + + private static Log LOG = LogFactory.getLog(LazyBinaryMap.class.getName()); + + /** + * Whether the data is already parsed or not. + */ + boolean parsed; + + /** + * The size of the map. + * Only valid when the data is parsed. + * -1 when the map is NULL. + */ + int mapSize = 0; + + /** + * The beginning position and length of key[i] and value[i]. + * Only valid when the data is parsed. + */ + int[] keyStart; + int[] keyLength; + int[] valueStart; + int[] valueLength; + /** + * Whether valueObjects[i]/keyObjects[i] is initialized or not. + */ + boolean[] keyInited; + boolean[] valueInited; + + /** + * Whether valueObjects[i]/keyObjects[i] is null or not + * This could not be inferred from the length of the object. + * In particular, a 0-length string is not null. + */ + boolean[] keyIsNull; + boolean[] valueIsNull; + + /** + * The keys are stored in an array of LazyPrimitives. + */ + LazyBinaryPrimitive[] keyObjects; + /** + * The values are stored in an array of LazyObjects. + * value[index] will start from KeyEnd[index] + 1, + * and ends before KeyStart[index+1] - 1. + */ + LazyBinaryObject[] valueObjects; + + protected LazyBinaryMap(LazyBinaryMapObjectInspector oi) { + super(oi); + } + + /** + * Set the row data for this LazyBinaryMap. + * @see LazyBinaryObject#init(ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + super.init(bytes, start, length); + parsed = false; + } + + /** + * Adjust the size of arrays: + * keyStart, keyLength + * valueStart, valueLength + * keyInited, keyIsNull + * valueInited, valueIsNull + */ + protected void adjustArraySize(int newSize) { + if (keyStart == null || keyStart.length < newSize) { + keyStart = new int[newSize]; + keyLength = new int[newSize]; + valueStart = new int[newSize]; + valueLength = new int[newSize]; + keyInited = new boolean[newSize]; + keyIsNull = new boolean[newSize]; + valueInited = new boolean[newSize]; + valueIsNull = new boolean[newSize]; + keyObjects = new LazyBinaryPrimitive[newSize]; + valueObjects = new LazyBinaryObject[newSize]; + } + } + + boolean nullMapKey = false; + VInt vInt = new LazyBinaryUtils.VInt(); + RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo(); + + /** + * Parse the byte[] and fill keyStart, keyLength, keyIsNull + * valueStart, valueLength and valueIsNull + */ + private void parse() { + + byte[] bytes = this.bytes.getData(); + + // get the VInt that represents the map size + LazyBinaryUtils.readVInt(bytes, start, vInt); + mapSize = vInt.value; + if(0 == mapSize) { + parsed = true; + return; + } + + // adjust arrays + adjustArraySize(mapSize); + + // find out the null-bytes + int mapByteStart = start + vInt.length; + int nullByteCur = mapByteStart; + int nullByteEnd = mapByteStart + (mapSize*2+7) / 8; + int lastElementByteEnd = nullByteEnd; + + // parsing the keys and values one by one + for (int i=0; i lazyKeyI = uncheckedGetKey(i); + if (lazyKeyI == null) continue; + // getWritableObject() will convert LazyPrimitive to actual primitive writable objects. + Object keyI = lazyKeyI.getWritableObject(); + if (keyI == null) continue; + if (keyI.equals(key)) { + // Got a match, return the value + LazyBinaryObject v = uncheckedGetValue(i); + return v == null ? v : v.getObject(); + } + } + return null; + } + + + /** + * Get the key object with the index without checking parsed. + * @param index The index into the array starting from 0 + */ + private LazyBinaryPrimitive uncheckedGetKey(int index) { + if (keyIsNull[index]) { + return null; + } + if (!keyInited[index]) { + keyInited[index] = true; + if (keyObjects[index] == null) { + // Keys are always primitive + keyObjects[index] = LazyBinaryFactory.createLazyBinaryPrimitiveClass( + (PrimitiveObjectInspector)((MapObjectInspector)oi).getMapKeyObjectInspector()); + } + keyObjects[index].init(bytes, keyStart[index], keyLength[index]); + } + return keyObjects[index]; + } + + /** + * cachedMap is reused for different calls to getMap(). + * But each LazyBinaryMap has a separate cachedMap so we won't overwrite the + * data by accident. + */ + LinkedHashMap cachedMap; + + /** + * Return the map object representing this LazyBinaryMap. + * Note that the keyObjects will be Writable primitive objects. + * @return the map object + */ + public Map getMap() { + if (!parsed) { + parse(); + } + if (cachedMap == null) { + // Use LinkedHashMap to provide deterministic order + cachedMap = new LinkedHashMap(); + } else { + cachedMap.clear(); + } + + // go through each element of the map + for (int i = 0; i < mapSize; i++) { + LazyBinaryPrimitive lazyKey = uncheckedGetKey(i); + if (lazyKey == null) continue; + Object key = lazyKey.getObject(); + // do not overwrite if there are duplicate keys + if (key != null && !cachedMap.containsKey(key)) { + LazyBinaryObject lazyValue = uncheckedGetValue(i); + Object value = (lazyValue == null ? null : lazyValue.getObject()); + cachedMap.put(key, value); + } + } + return cachedMap; + } + + /** + * Get the size of the map represented by this LazyBinaryMap. + * @return The size of the map + */ + public int getMapSize() { + if (!parsed) { + parse(); + } + return mapSize; + } +} \ No newline at end of file Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java (revision 0) @@ -0,0 +1,237 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazy.LazyObject; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; + +/** + * LazyBinaryArray is serialized as follows: + * start A b b b b b b end + * bytes[] -> |--------|---|---|---|---| ... |---|---| + * + * Section A is the null-bytes. Suppose the list has + * N elements, then there are (N+7)/8 bytes used as null-bytes. + * Each bit corresponds to an element and it indicates whether + * that element is null (0) or not null (1). + * + * After A, all b(s) represent the elements of the list. + * Each of them is again a LazyBinaryObject. + * + */ + +public class LazyBinaryArray extends LazyBinaryNonPrimitive { + + /** + * Whether the data is already parsed or not. + */ + boolean parsed = false; + /** + * The length of the array. + * Only valid when the data is parsed. + */ + int arraySize = 0; + + /** + * The start positions and lengths of array elements. + * Only valid when the data is parsed. + */ + int[] elementStart; + int[] elementLength; + + /** + * Whether an element is initialized or not + */ + boolean[] elementInited; + + /** + * Whether an element is null or not. + * Because length is 0 does not means the field is null. + * In particular, a 0-length string is not null. + */ + boolean[] elementIsNull; + + /** + * The elements of the array. Note that we call + * arrayElements[i].init(bytes, begin, length) + * only when that element is accessed. + */ + LazyBinaryObject[] arrayElements; + + /** + * Construct a LazyBinaryArray object with the ObjectInspector. + * @param oi the oi representing the type of this LazyBinaryArray + */ + protected LazyBinaryArray(LazyBinaryListObjectInspector oi) { + super(oi); + } + + /** + * Set the row data for this LazyBinaryArray. + * @see LazyObject#init(ByteArrayRef, int, int) + */ + @Override + public void init(ByteArrayRef bytes, int start, int length) { + super.init(bytes, start, length); + parsed = false; + } + + /** + * Enlarge the size of arrays storing information for the elements inside + * the array. + */ + private void adjustArraySize(int newSize) { + if (elementStart == null || elementStart.length < newSize) { + elementStart = new int[newSize]; + elementLength = new int[newSize]; + elementInited = new boolean[newSize]; + elementIsNull = new boolean[newSize]; + arrayElements = new LazyBinaryObject[newSize]; + } + } + + VInt vInt = new LazyBinaryUtils.VInt(); + RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo(); + + /** + * Parse the bytes and fill elementStart, elementLength, + * elementInited and elementIsNull. + */ + private void parse() { + + byte[] bytes = this.bytes.getData(); + + // get the vlong that represents the map size + LazyBinaryUtils.readVInt(bytes, start, vInt); + arraySize = vInt.value; + if(0 == arraySize) { + parsed = true; + return; + } + + // adjust arrays + adjustArraySize(arraySize); + // find out the null-bytes + int arryByteStart = start + vInt.length; + int nullByteCur = arryByteStart; + int nullByteEnd = arryByteStart + (arraySize+7) / 8; + // the begin the real elements + int lastElementByteEnd = nullByteEnd; + // parsing elements one by one + for (int i=0; i= arraySize) { + return null; + } + return uncheckedGetElement(index); + } + + /** + * Get the element without checking out-of-bound index. + * @param index index to the array element + */ + private Object uncheckedGetElement(int index) { + + if (elementIsNull[index]) { + return null; + } else { + if (!elementInited[index]) { + elementInited[index] = true; + if (arrayElements[index] == null) { + arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject( + ((LazyBinaryListObjectInspector)oi).getListElementObjectInspector()); + } + arrayElements[index].init(bytes, elementStart[index], elementLength[index]); + } + } + return arrayElements[index].getObject(); + } + + /** + * Returns the array size. + */ + public int getListLength() { + if (!parsed) { + parse(); + } + return arraySize; + } + + /** + * cachedList is reused every time getList is called. + * Different LazyBianryArray instances cannot share + * the same cachedList. + */ + ArrayList cachedList; + + /** Returns the List of actual primitive objects. + * Returns null for null array. + */ + public List getList() { + if (!parsed) { + parse(); + } + if (cachedList == null) { + cachedList = new ArrayList(arraySize); + } else { + cachedList.clear(); + } + for (int index=0; index + * Part of the code is adapted from Apache Harmony Project. + * + * As with the specification, this implementation relied on code laid out in Henry S. Warren, Jr.'s Hacker's + * Delight, (Addison Wesley, 2002) as well as The Aggregate's Magic Algorithms. + *

+ * + */ +public class LazyBinaryBoolean extends LazyBinaryPrimitive { + + public LazyBinaryBoolean(WritableBooleanObjectInspector oi) { + super(oi); + data = new BooleanWritable(); + } + + public LazyBinaryBoolean(LazyBinaryBoolean copy) { + super(copy); + data = new BooleanWritable(copy.data.get()); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + assert(1 == length); + byte val = bytes.getData()[start]; + if (val == 0) { + data.set(false); + } else if (val == 1) { + data.set(true); + } + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (revision 0) @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; + +public abstract class LazyBinaryNonPrimitive extends LazyBinaryObject { + + protected ByteArrayRef bytes; + protected int start; + protected int length; + + protected LazyBinaryNonPrimitive(OI oi) { + super(oi); + bytes = null; + start = 0; + length = 0; + } + + @Override + public Object getObject() { + return this; + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + if (null == bytes) { + throw new RuntimeException("bytes cannot be null!"); + } + if (length <= 0) { + throw new RuntimeException("length should be positive!"); + } + this.bytes = bytes; + this.start = start; + this.length = length; + } +} Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java =================================================================== --- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (revision 0) +++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (revision 0) @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.serde2.lazybinary; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo; +import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; + +/** + * LazyBinaryStruct is serialized as follows: + * start A B A B A B end + * bytes[] -> |-----|---------|--- ... ---|-----|---------| + * + * Section A is one null-byte, corresponding to eight struct fields in Section B. + * Each bit indicates whether the corresponding field is null (0) or not null (1). + * Each field is a LazyBinaryObject. + * + * Following B, there is another section A and B. This pattern repeats until the + * all struct fields are serialized. + */ +public class LazyBinaryStruct extends LazyBinaryNonPrimitive { + + private static Log LOG = LogFactory.getLog(LazyBinaryStruct.class.getName()); + + /** + * Whether the data is already parsed or not. + */ + boolean parsed; + + /** + * The fields of the struct. + */ + LazyBinaryObject[] fields; + + /** + * Whether a field is initialized or not. + */ + boolean[] fieldInited; + + /** + * Whether a field is null or not. + * Because length is 0 does not means the field is null. + * In particular, a 0-length string is not null. + */ + boolean[] fieldIsNull; + + /** + * The start positions and lengths of struct fields. + * Only valid when the data is parsed. + */ + int[] fieldStart; + int[] fieldLength; + + /** + * Construct a LazyBinaryStruct object with an ObjectInspector. + */ + protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) { + super(oi); + } + + @Override + public void init(ByteArrayRef bytes, int start, int length) { + super.init(bytes, start, length); + parsed = false; + } + + RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo(); + boolean missingFieldWarned = false; + boolean extraFieldWarned = false; + /** + * Parse the byte[] and fill fieldStart, fieldLength, + * fieldInited and fieldIsNull. + */ + private void parse() { + + List fieldRefs = ((StructObjectInspector)oi).getAllStructFieldRefs(); + + if (fields == null) { + fields = new LazyBinaryObject[fieldRefs.size()]; + for (int i = 0 ; i < fields.length; i++) { + fields[i] = LazyBinaryFactory.createLazyBinaryObject(fieldRefs.get(i).getFieldObjectInspector()); + } + fieldInited = new boolean[fields.length]; + fieldIsNull = new boolean[fields.length]; + fieldStart = new int[fields.length]; + fieldLength = new int[fields.length]; + } + + /** + * Please note that one null byte is followed by eight fields, + * then more null byte and fields. + */ + + int fieldId = 0; + int structByteEnd = start + length; + byte[] bytes = this.bytes.getData(); + + byte nullByte = bytes[start]; + int lastFieldByteEnd = start + 1; + // Go through all bytes in the byte[] + for (int i=0; i structByteEnd) { + missingFieldWarned = true; + LOG.warn("Missing fields! Expected " + fields.length + " fields but " + + "only got " + fieldId + "! Ignoring similar problems."); + } + + Arrays.fill(fieldInited, false); + parsed = true; + } + + /** + * Get one field out of the struct. + * + * If the field is a primitive field, return the actual object. + * Otherwise return the LazyObject. This is because PrimitiveObjectInspector + * does not have control over the object used by the user - the user simply + * directly use the Object instead of going through + * Object PrimitiveObjectInspector.get(Object). + * + * @param fieldID The field ID + * @return The field as a LazyObject + */ + public Object getField(int fieldID) { + if (!parsed) { + parse(); + } + return uncheckedGetField(fieldID); + } + + /** + * Get the field out of the row without checking parsed. + * This is called by both getField and getFieldsAsList. + * @param fieldID The id of the field starting from 0. + * @return The value of the field + */ + private Object uncheckedGetField(int fieldID) { + // Test the length first so in most cases we avoid doing a byte[] + // comparison. + if (fieldIsNull[fieldID]) { + return null; + } + if (!fieldInited[fieldID]) { + fieldInited[fieldID] = true; + fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]); + } + return fields[fieldID].getObject(); + } + + ArrayList cachedList; + /** + * Get the values of the fields as an ArrayList. + * @return The values of the fields as an ArrayList. + */ + public ArrayList getFieldsAsList() { + if (!parsed) { + parse(); + } + if (cachedList == null) { + cachedList = new ArrayList(); + } else { + cachedList.clear(); + } + for (int i=0; i