Index: serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java
===================================================================
--- serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java (revision 799674)
+++ serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java (working copy)
@@ -47,7 +47,7 @@
return r;
}
- public String hexString(BytesWritable bytes) {
+ public static String hexString(BytesWritable bytes) {
StringBuilder sb = new StringBuilder();
for (int i=0; i getRandIntegerArray(Random r) {
+ public static List getRandIntegerArray(Random r) {
int length = r.nextInt(10);
ArrayList result = new ArrayList(length);
for(int i=0; i myList;
+
+ public MyTestClass() {
+ }
+
+ public MyTestClass(Byte b, Short s, Integer i, Long l,
+ Float f, Double d, String st, MyTestInnerStruct is,
+ List li) {
+ this.myByte = b;
+ this.myShort = s;
+ this.myInt = i;
+ this.myLong = l;
+ this.myFloat = f;
+ this.myDouble = d;
+ this.myString = st;
+ this.myStruct = is;
+ this.myList = li;
+ }
}
Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java
===================================================================
--- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (revision 0)
+++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (revision 0)
@@ -0,0 +1,468 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
+import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.io.BytesWritable;
+
+import junit.framework.TestCase;
+
+public class TestLazyBinarySerDe extends TestCase {
+
+ /**
+ * Generate a random struct array
+ * @param r random number generator
+ * @return an struct array
+ */
+ static List getRandStructArray(Random r) {
+ int length = r.nextInt(10);
+ ArrayList result = new ArrayList(length);
+ for(int i=0; i fields1 = soi1.getAllStructFieldRefs();
+ List extends StructField> fields2 = soi2.getAllStructFieldRefs();
+ int minimum = Math.min(fields1.size(), fields2.size());
+ for (int i=0; i 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+ Map> mp = new HashMap>();
+ String key = TestBinarySortableSerDe.getRandString(r);
+ List value = randField > 10 ? null: getRandStructArray(r);
+ mp.put(key, value);
+ String key1 = TestBinarySortableSerDe.getRandString(r);
+ mp.put(key1, null);
+ String key2 = TestBinarySortableSerDe.getRandString(r);
+ List value2 = getRandStructArray(r);
+ mp.put(key2, value2);
+
+ MyTestClassBigger input = new MyTestClassBigger(b,s,n,l,f,d,st,is,li,mp);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ /**
+ * Test shorter schema deserialization where a bigger struct is serialized
+ * and it is then deserialized with a smaller struct.
+ * Here the serialized struct has 9 fields and we deserialized to a
+ * struct of 8 fields.
+ */
+ private void testShorterSchemaDeserialization1(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassSmaller.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+
+ MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ /**
+ * Test longer schema deserialization where a smaller struct is serialized
+ * and it is then deserialized with a bigger struct
+ * Here the serialized struct has 9 fields and we deserialized to a
+ * struct of 10 fields.
+ */
+ void testLongerSchemaDeserialization(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassBigger.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+
+ MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ /**
+ * Test longer schema deserialization where a smaller struct is serialized
+ * and it is then deserialized with a bigger struct
+ * Here the serialized struct has 8 fields and we deserialized to a
+ * struct of 9 fields.
+ */
+ void testLongerSchemaDeserialization1(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassSmaller.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+
+ MyTestClassSmaller input = new MyTestClassSmaller(b,s,n,l,f,d,st,is);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ void testLazyBinaryMap(Random r) throws Throwable {
+
+ StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassBigger.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
+ SerDe serde = getSerDe(fieldNames, fieldTypes);
+ ObjectInspector serdeOI = serde.getObjectInspector();
+
+ StructObjectInspector soi1 = (StructObjectInspector)serdeOI;
+ List extends StructField> fields1 = soi1.getAllStructFieldRefs();
+ LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(9).getFieldObjectInspector();
+ ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector();
+ ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();
+
+ StructObjectInspector soi2 = (StructObjectInspector)rowOI;
+ List extends StructField> fields2 = soi2.getAllStructFieldRefs();
+ MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(9).getFieldObjectInspector();
+ ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
+ ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();
+
+ int num = 100;
+ for (int testi=0; testi> mp = new LinkedHashMap>();
+
+ int randFields = r.nextInt(10);
+ for (int i=0; i value = randField > 4 ? null: getRandStructArray(r);
+ mp.put(key, value);
+ }
+
+ MyTestClassBigger input = new MyTestClassBigger(null,null,null,null,null,null,null,null,null,mp);
+ BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI);
+ Object output = serde.deserialize(bw);
+ Object lazyobj = soi1.getStructFieldData(output, fields1.get(9));
+ Map, ?> outputmp = lazympoi.getMap(lazyobj);
+
+ if (outputmp.size() != mp.size()) {
+ throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
+ }
+
+ for (Map.Entry, ?> entryinput: mp.entrySet()) {
+ boolean bEqual = false;
+ for (Map.Entry, ?> entryoutput: outputmp.entrySet()) {
+ // find the same key
+ if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
+ if(0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
+ assertEquals(entryoutput.getValue(), entryinput.getValue());
+ } else {
+ bEqual = true;
+ }
+ break;
+ }
+ }
+ if(!bEqual)
+ throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
+ }
+ }
+ }
+
+ /**
+ * The test entrance function
+ * @throws Throwable
+ */
+ public void testLazyBinarySerDe() throws Throwable {
+ try {
+
+ System.out.println("Beginning Test TestLazyBinarySerDe:");
+
+ // generate the data
+ int num = 1000;
+ Random r = new Random(1234);
+ MyTestClass rows[] = new MyTestClass[num];
+ for (int i=0; i 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+ MyTestClass t = new MyTestClass(b,s,n,l,f,d,st,is,li);
+ rows[i] = t;
+ }
+
+ StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
+
+ // call the tests
+ // 1/ test LazyBinarySerDe
+ testLazyBinarySerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes));
+ // 2/ test LazyBinaryMap
+ testLazyBinaryMap(r);
+ // 3/ test serialization and deserialization with different schemas
+ testShorterSchemaDeserialization(r);
+ // 4/ test serialization and deserialization with different schemas
+ testLongerSchemaDeserialization(r);
+ // 5/ test serialization and deserialization with different schemas
+ testShorterSchemaDeserialization1(r);
+ // 6/ test serialization and deserialization with different schemas
+ testLongerSchemaDeserialization1(r);
+
+ System.out.println("Test TestLazyBinarySerDe passed!");
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+}
Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java
===================================================================
--- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (revision 0)
+++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (revision 0)
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
+
+public class MyTestClassBigger {
+ Byte myByte;
+ Short myShort;
+ Integer myInt;
+ Long myLong;
+ Float myFloat;
+ Double myDouble;
+ String myString;
+ MyTestInnerStruct myStruct;
+ List myList;
+ Map> myMap;
+
+ public MyTestClassBigger() {
+ }
+
+ public MyTestClassBigger(Byte b, Short s, Integer i, Long l,
+ Float f, Double d, String st, MyTestInnerStruct is,
+ List li, Map> mp) {
+ this.myByte = b;
+ this.myShort = s;
+ this.myInt = i;
+ this.myLong = l;
+ this.myFloat = f;
+ this.myDouble = d;
+ this.myString = st;
+ this.myStruct = is;
+ this.myList = li;
+ this.myMap = mp;
+ }
+}
Index: serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java
===================================================================
--- serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (revision 0)
+++ serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (revision 0)
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
+
+public class MyTestClassSmaller {
+ Byte myByte;
+ Short myShort;
+ Integer myInt;
+ Long myLong;
+ Float myFloat;
+ Double myDouble;
+ String myString;
+ MyTestInnerStruct myStruct;
+
+ public MyTestClassSmaller() {
+ }
+
+ public MyTestClassSmaller(Byte b, Short s, Integer i, Long l,
+ Float f, Double d, String st, MyTestInnerStruct is) {
+ this.myByte = b;
+ this.myShort = s;
+ this.myInt = i;
+ this.myLong = l;
+ this.myFloat = f;
+ this.myDouble = d;
+ this.myString = st;
+ this.myStruct = is;
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java (revision 799674)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java (working copy)
@@ -226,7 +226,6 @@
/**
* Get the value object with the index without checking parsed.
* @param index The index into the array starting from 0
- * @param nullSequence The byte sequence representing the NULL value
*/
private LazyObject uncheckedGetValue(int index) {
Text nullSequence = oi.getNullSequence();
@@ -252,7 +251,6 @@
/**
* Get the key object with the index without checking parsed.
* @param index The index into the array starting from 0
- * @param nullSequence The byte sequence representing the NULL value
*/
private LazyPrimitive,?> uncheckedGetKey(int index) {
Text nullSequence = oi.getNullSequence();
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (revision 799674)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (working copy)
@@ -39,8 +39,8 @@
* SerDe classes should call the static functions in this library to create an ObjectInspector
* to return to the caller of SerDe2.getObjectInspector().
*
- * The reason of having caches here is that ObjectInspector is because ObjectInspectors do
- * not have an internal state - so ObjectInspectors with the same construction parameters should
+ * The reason of having caches here is that ObjectInspectors do not have an internal
+ * state - so ObjectInspectors with the same construction parameters should
* result in exactly the same ObjectInspector.
*/
public class LazyObjectInspectorFactory {
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java (revision 0)
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * LazyBinaryObject for integer which is serialized as VInt
+ * @see LazyBinaryUtils#readVInt(byte[], int, VInt)
+ */
+public class LazyBinaryInteger extends LazyBinaryPrimitive {
+
+ LazyBinaryInteger(WritableIntObjectInspector oi) {
+ super(oi);
+ data = new IntWritable();
+ }
+
+ LazyBinaryInteger(LazyBinaryInteger copy) {
+ super(copy);
+ data = new IntWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vInt for decoding the integer
+ */
+ VInt vInt = new LazyBinaryUtils.VInt();
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ LazyBinaryUtils.readVInt(bytes.getData(), start, vInt);
+ assert(length == vInt.length);
+ data.set(vInt.value);
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java (revision 0)
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+
+/**
+ * LazyBinaryObject for short which takes two bytes.
+ */
+public class LazyBinaryShort extends LazyBinaryPrimitive {
+
+ LazyBinaryShort(WritableShortObjectInspector oi) {
+ super(oi);
+ data = new ShortWritable();
+ }
+
+ LazyBinaryShort(LazyBinaryShort copy) {
+ super(copy);
+ data = new ShortWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(2 == length);
+ data.set(LazyBinaryUtils.byteArrayToShort(bytes.getData(), start));
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java (revision 0)
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * LazyBinaryObject for long which stores as VLong.
+ * @see LazyBinaryUtils#readVLong(byte[], int, VLong)
+ */
+public class LazyBinaryLong extends LazyBinaryPrimitive {
+
+ LazyBinaryLong(WritableLongObjectInspector oi) {
+ super(oi);
+ data = new LongWritable();
+ }
+
+ LazyBinaryLong(LazyBinaryLong copy) {
+ super(copy);
+ data = new LongWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vLong for decoding the long
+ */
+ VLong vLong = new LazyBinaryUtils.VLong();
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ LazyBinaryUtils.readVLong(bytes.getData(), start, vLong);
+ assert(length == vLong.length);
+ data.set(vLong.value);
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (revision 0)
@@ -0,0 +1,344 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
+
+public class LazyBinaryUtils {
+
+ /**
+ * Convert the byte array to an int starting from the given offset.
+ * Refer to code by aeden on DZone Snippets:
+ * @param b the byte array
+ * @param offset the array offset
+ * @return the integer
+ */
+ public static int byteArrayToInt(byte[] b, int offset) {
+ int value = 0;
+ for (int i = 0; i < 4; i++) {
+ int shift = (4 - 1 - i) * 8;
+ value += (b[i + offset] & 0x000000FF) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a long starting from the given offset.
+ * @param b the byte array
+ * @param offset the array offset
+ * @return the long
+ */
+ public static long byteArrayToLong(byte[] b, int offset) {
+ long value = 0;
+ for (int i = 0; i < 8; i++) {
+ int shift = (8 - 1 - i) * 8;
+ value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a short starting from the given offset.
+ * @param b the byte array
+ * @param offset the array offset
+ * @return the short
+ */
+ public static short byteArrayToShort(byte[] b, int offset) {
+ short value = 0;
+ value += (b[offset ] & 0x000000FF) << 8;
+ value += (b[offset+1] & 0x000000FF);
+ return value;
+ }
+
+ /**
+ * Record is the unit that data is serialized in.
+ * A record includes two parts. The first part stores the
+ * size of the element and the second part stores the
+ * real element.
+ * size element
+ * record -> |----|-------------------------|
+ *
+ * A RecordInfo stores two information of a record,
+ * the size of the "size" part which is the element offset
+ * and the size of the element part which is element size.
+ */
+ public static class RecordInfo {
+ public RecordInfo () {
+ elementOffset = 0;
+ elementSize = 0;
+ }
+ public byte elementOffset;
+ public int elementSize;
+ }
+
+ static VInt vInt = new LazyBinaryUtils.VInt();
+ /**
+ * Check a particular field and set its size and offset in bytes
+ * based on the field type and the bytes arrays.
+ *
+ * For void, boolean, byte, short, int, long, float and double,
+ * there is no offset and the size is fixed. For string, map,
+ * list, struct, the first four bytes are used to store the size.
+ * So the offset is 4 and the size is computed by concating the
+ * first four bytes together. The first four bytes are defined
+ * with respect to the offset in the bytes arrays.
+ *
+ * @param objectInspector object inspector of the field
+ * @param bytes bytes arrays store the table row
+ * @param offset offset of this field
+ * @param recordInfo modify this byteinfo object and return it
+ * @return size and offset in bytes of this field
+ */
+ public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo) {
+ Category category = objectInspector.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector)objectInspector).getPrimitiveCategory();
+ switch (primitiveCategory) {
+ case VOID:
+ case BOOLEAN:
+ case BYTE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 1;
+ break;
+ case SHORT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 2;
+ break;
+ case FLOAT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 4;
+ break;
+ case DOUBLE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 8;
+ break;
+ case INT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case LONG:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case STRING:
+ // using vint instead of 4 bytes
+ LazyBinaryUtils.readVInt(bytes, offset, vInt);
+ recordInfo.elementOffset = vInt.length;
+ recordInfo.elementSize = vInt.value;
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
+ }
+ }
+ break;
+ case LIST:
+ case MAP:
+ case STRUCT:
+ recordInfo.elementOffset = 4;
+ recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
+ break;
+ default : {
+ throw new RuntimeException("Unrecognized non-primitive type: " + category);
+ }
+ }
+ }
+
+ /**
+ * A zero-compressed encoded long
+ * @see WritableUtils#readVLong(java.io.DataInput)
+ */
+ public static class VLong {
+ public VLong() {
+ value = 0;
+ length = 0;
+ }
+ public long value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded long from a byte array and returns it.
+ * @param bytes the byte array
+ * @param offset offset of the array to read from
+ * @param vlong storing the deserialized long and its size in byte
+ * @see WritableUtils#readVLong(java.io.DataInput)
+ */
+ public static void readVLong(byte[] bytes, int offset, VLong vlong) {
+ byte firstByte = bytes[offset];
+ vlong.length = (byte)WritableUtils.decodeVIntSize(firstByte);
+ if (vlong.length == 1) {
+ vlong.value = firstByte;
+ return;
+ }
+ long i = 0;
+ for (int idx = 0; idx < vlong.length-1; idx++) {
+ byte b = bytes[offset+1+idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+ }
+
+ /**
+ * A zero-compressed encoded integer
+ * @see WritableUtils#readVInt(java.io.DataInput)
+ */
+ public static class VInt {
+ public VInt() {
+ value = 0;
+ length = 0;
+ }
+ public int value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded int from a byte array and returns it.
+ * @param bytes the byte array
+ * @param offset offset of the array to read from
+ * @param vint storing the deserialized int and its size in byte
+ * @see WritableUtils#readVInt(java.io.DataInput)
+ */
+ public static void readVInt(byte[] bytes, int offset, VInt vInt) {
+ byte firstByte = bytes[offset];
+ vInt.length = (byte)WritableUtils.decodeVIntSize(firstByte);
+ if (vInt.length == 1) {
+ vInt.value = firstByte;
+ return;
+ }
+ int i = 0;
+ for (int idx = 0; idx < vInt.length-1; idx++) {
+ byte b = bytes[offset+1+idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ * @param byteStream the byte array/stream
+ * @param i the int
+ * @see LazyBinaryUtils#writeVLong(Output, long)
+ */
+ public static void writeVInt(Output byteStream, int i) {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ * @param byteStream the byte array/stream
+ * @param l the long
+ * @see WritableUtils#writeVLong(java.io.DataOutput, long)
+ */
+ public static void writeVLong(Output byteStream, long l) {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte)l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte)len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte)((l & mask) >> shiftbits));
+ }
+ }
+
+ static HashMap cachedLazyBinaryObjectInspector = new HashMap();
+
+ /**
+ * Returns the lazy binary object inspector that can be used to inspect an
+ * lazy binary object of that typeInfo
+ *
+ * For primitive types, we use the standard writable object inspector.
+ */
+ public static ObjectInspector getLazyBinaryObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
+ ObjectInspector result = cachedLazyBinaryObjectInspector.get(typeInfo);
+ if (result == null) {
+ switch(typeInfo.getCategory()) {
+ case PRIMITIVE: {
+ result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ ((PrimitiveTypeInfo)typeInfo).getPrimitiveCategory());
+ break;
+ }
+ case LIST: {
+ ObjectInspector elementObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(
+ ((ListTypeInfo)typeInfo).getListElementTypeInfo());
+ result = LazyBinaryObjectInspectorFactory.getLazyBinaryListObjectInspector(elementObjectInspector);
+ break;
+ }
+ case MAP: {
+ MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo;
+ ObjectInspector keyObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo());
+ ObjectInspector valueObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo());
+ result = LazyBinaryObjectInspectorFactory.getLazyBinaryMapObjectInspector(keyObjectInspector, valueObjectInspector);
+ break;
+ }
+ case STRUCT: {
+ StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;
+ List fieldNames = structTypeInfo.getAllStructFieldNames();
+ List fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List fieldObjectInspectors = new ArrayList(fieldTypeInfos.size());
+ for(int i=0; i columnNames;
+ List columnTypes;
+
+ TypeInfo rowTypeInfo;
+ ObjectInspector cachedObjectInspector;
+
+ // The object for storing row data
+ LazyBinaryStruct cachedLazyBinaryStruct;
+
+ /**
+ * Initialize the SerDe with configuration and table information
+ * @see SerDe#initialize(Configuration, Properties)
+ */
+ @Override
+ public void initialize(Configuration conf, Properties tbl)
+ throws SerDeException {
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+ assert(columnNames.size() == columnTypes.size());
+ // Create row related objects
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ // Create the object inspector and the lazy binary struct object
+ cachedObjectInspector = LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(rowTypeInfo);
+ cachedLazyBinaryStruct = (LazyBinaryStruct) LazyBinaryFactory
+ .createLazyBinaryObject(cachedObjectInspector);
+ // output debug info
+ LOG.debug("LazyBinarySerDe initialized with: columnNames="
+ + columnNames + " columnTypes=" + columnTypes);
+ }
+
+ /**
+ * Returns the ObjectInspector for the row.
+ * @see Deserializer#getObjectInspector()
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ /**
+ * Returns the Writable Class after serialization.
+ * @see Serializer#getSerializedClass()
+ */
+ @Override
+ public Class extends Writable> getSerializedClass() {
+ return BytesWritable.class;
+ }
+
+ // The wrapper for byte array
+ ByteArrayRef byteArrayRef;
+
+ /**
+ * Deserialize a table record to a lazybinary struct.
+ * @see Deserializer#deserialize(Writable)
+ */
+ @Override
+ public Object deserialize(Writable field) throws SerDeException {
+ if (byteArrayRef == null) {
+ byteArrayRef = new ByteArrayRef();
+ }
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable)field;
+ if(b.getSize()==0)
+ return null;
+ // For backward-compatibility with hadoop 0.17
+ byteArrayRef.setData(b.get());
+ cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getSize());
+ } else if (field instanceof Text) {
+ Text t = (Text)field;
+ if(t.getLength()==0)
+ return null;
+ byteArrayRef.setData(t.getBytes());
+ cachedLazyBinaryStruct.init(byteArrayRef, 0, t.getLength());
+ } else {
+ throw new SerDeException(getClass().toString()
+ + ": expects either BytesWritable or Text object!");
+ }
+ return cachedLazyBinaryStruct;
+ }
+
+ /**
+ * The reusable output buffer and serialize byte buffer.
+ */
+ BytesWritable serializeBytesWritable = new BytesWritable();
+ ByteStream.Output serializeByteStream = new ByteStream.Output();
+
+ /**
+ * Serialize an object to a byte buffer in a binary compact way.
+ * @see Serializer#serialize(Object, ObjectInspector)
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException {
+ // make sure it is a struct record
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: "
+ + objInspector.getTypeName());
+ }
+
+ serializeByteStream.reset();
+ // serialize the row as a struct
+ serializeStruct(serializeByteStream, obj,
+ (StructObjectInspector) objInspector);
+ // return the serialized bytes
+ serializeBytesWritable.set(serializeByteStream.getData(),
+ 0, serializeByteStream.getCount());
+ return serializeBytesWritable;
+ }
+
+ boolean nullMapKey = false;
+
+ /**
+ * Serialize a struct object without writing the byte size.
+ * This function is shared by both row serialization and
+ * struct serialization.
+ *
+ * @param byteStream the byte stream storing the serialization data
+ * @param obj the struct object to serialize
+ * @param objInspector the struct object inspector
+ */
+ private void serializeStruct(Output byteStream,
+ Object obj, StructObjectInspector soi) {
+ // do nothing for null struct
+ if(null == obj)
+ return;
+ /*
+ * Interleave serializing one null byte and 8 struct fields
+ * in each round, in order to support data deserialization
+ * with different table schemas
+ */
+ List extends StructField> fields = soi.getAllStructFieldRefs();
+ int size = fields.size();
+ int lasti = 0;
+ byte nullByte = 0;
+ for (int i=0; i> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case INT: {
+ IntObjectInspector ioi = (IntObjectInspector)poi;
+ int v = ioi.get(obj);
+ LazyBinaryUtils.writeVInt(byteStream, v);
+ return;
+ }
+ case LONG: {
+ LongObjectInspector loi = (LongObjectInspector)poi;
+ long v = loi.get(obj);
+ LazyBinaryUtils.writeVLong(byteStream, v);
+ return;
+ }
+ case FLOAT: {
+ FloatObjectInspector foi = (FloatObjectInspector)poi;
+ int v = Float.floatToIntBits(foi.get(obj));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case DOUBLE: {
+ DoubleObjectInspector doi = (DoubleObjectInspector)poi;
+ long v = Double.doubleToLongBits(doi.get(obj));
+ byteStream.write((byte) (v >> 56));
+ byteStream.write((byte) (v >> 48));
+ byteStream.write((byte) (v >> 40));
+ byteStream.write((byte) (v >> 32));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case STRING: {
+ StringObjectInspector soi = (StringObjectInspector)poi;
+ Text t = soi.getPrimitiveWritableObject(obj);
+ /* write byte size of the string which is a vint */
+ int length = t.getLength();
+ LazyBinaryUtils.writeVInt(byteStream, length);
+ /* write string itself */
+ byte[] data = t.getBytes();
+ byteStream.write(data, 0, length);
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector)objInspector;
+ ObjectInspector eoi = loi.getListElementObjectInspector();
+
+ // 1/ reserve spaces for the byte size of the list
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int listStart = byteStream.getCount();
+
+ // 2/ write the size of the list as a VInt
+ int size = loi.getListLength(obj);
+ LazyBinaryUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ byte nullByte = 0;
+ for (int eid = 0; eid < size; eid++) {
+ // set the bit to 1 if an element is not null
+ if (null != loi.getListElement(obj, eid)) {
+ nullByte |= 1 << (eid%8);
+ }
+ // store the byte every eight elements or
+ // if this is the last element
+ if (7 == eid%8 || eid == size-1) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write element by element from the list
+ for (int eid = 0; eid < size; eid++) {
+ serialize(byteStream, loi.getListElement(obj, eid), eoi);
+ }
+
+ // 5/ update the list byte size
+ int listEnd = byteStream.getCount();
+ int listSize = listEnd - listStart;
+ byte [] bytes = byteStream.getData();
+ bytes[byteSizeStart ] = (byte) (listSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (listSize);
+
+ return;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector)objInspector;
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map, ?> map = moi.getMap(obj);
+
+ // 1/ reserve spaces for the byte size of the map
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int mapStart = byteStream.getCount();
+
+ // 2/ write the size of the map which is a VInt
+ int size = map.size();
+ LazyBinaryUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ int b = 0;
+ byte nullByte = 0;
+ for (Map.Entry, ?> entry: map.entrySet()) {
+ // set the bit to 1 if a key is not null
+ if (null != entry.getKey()) {
+ nullByte |= 1 << (b%8);
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+ b++;
+ // set the bit to 1 if a value is not null
+ if (null != entry.getValue()) {
+ nullByte |= 1 << (b%8);
+ }
+ b++;
+ // write the byte to stream every 4 key-value pairs
+ // or if this is the last key-value pair
+ if (0 == b%8 || b == size*2) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write key-value pairs one by one
+ for(Map.Entry, ?> entry: map.entrySet()) {
+ serialize(byteStream, entry.getKey(), koi);
+ serialize(byteStream, entry.getValue(), voi);
+ }
+
+ // 5/ update the byte size of the map
+ int mapEnd = byteStream.getCount();
+ int mapSize = mapEnd - mapStart;
+ byte [] bytes = byteStream.getData();
+ bytes[byteSizeStart ] = (byte) (mapSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (mapSize);
+
+ return;
+ }
+ case STRUCT: {
+ // 1/ reserve spaces for the byte size of the struct
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int structStart = byteStream.getCount();
+
+ // 2/ serialize the struct
+ serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+
+ // 3/ update the byte size of the struct
+ int structEnd = byteStream.getCount();
+ int structSize = structEnd - structStart;
+ byte [] bytes = byteStream.getData();
+ bytes[byteSizeStart ] = (byte) (structSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (structSize);
+
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
+ }
+ }
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java (revision 0)
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+
+/**
+ * LazyBinaryObject for byte which takes one byte
+ */
+public class LazyBinaryByte extends LazyBinaryPrimitive {
+
+ LazyBinaryByte(WritableByteObjectInspector oi) {
+ super(oi);
+ data = new ByteWritable();
+ }
+
+ LazyBinaryByte(LazyBinaryByte copy) {
+ super(copy);
+ data = new ByteWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(1 == length);
+ data.set(bytes.getData()[start]);
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java (revision 0)
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.io.FloatWritable;
+
+/**
+ * LazyBinaryObject for float which takes four bytes.
+ */
+public class LazyBinaryFloat extends LazyBinaryPrimitive {
+
+ LazyBinaryFloat(WritableFloatObjectInspector oi) {
+ super(oi);
+ data = new FloatWritable();
+ }
+
+ LazyBinaryFloat(LazyBinaryFloat copy) {
+ super(copy);
+ data = new FloatWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert (4 == length);
+ data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes.getData(), start)));
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (revision 0)
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * LazyBinaryObject stores an object in a binary format in a byte[].
+ * For example, a double takes four bytes.
+ *
+ * A LazyBinaryObject can represent any primitive object or hierarchical object
+ * like string, list, map or struct.
+ */
+public abstract class LazyBinaryObject {
+
+ OI oi;
+
+ /**
+ * Create a LazyBinaryObject.
+ * @param oi Derived classes can access meta information about this Lazy
+ * Binary Object (e.g, length, null-bits) from it.
+ */
+ protected LazyBinaryObject(OI oi) {
+ this.oi = oi;
+ }
+
+ /**
+ * Set the data for this LazyBinaryObject.
+ * We take ByteArrayRef instead of byte[] so that we will be able to drop
+ * the reference to byte[] by a single assignment.
+ * The ByteArrayRef object can be reused across multiple rows.
+ *
+ * Never call this function if the object represent a null!!!
+ *
+ * @param bytes The wrapper of the byte[].
+ * @param start The start position inside the bytes.
+ * @param length The length of the data, starting from "start"
+ * @see ByteArrayRef
+ */
+ public abstract void init(ByteArrayRef bytes, int start, int length);
+
+ /**
+ * If the LazyBinaryObject is a primitive Object, then deserialize it and return
+ * the actual primitive Object.
+ * Otherwise (string, list, map, struct), return this.
+ */
+ public abstract Object getObject();
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java (revision 0)
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+
+/**
+ * The serialization of LazyBinaryString is very simple:
+ * start A end
+ * bytes[] -> |---------------------------------|
+ *
+ * Section A is just an array of bytes which are exactly
+ * the Text contained in this object.
+ *
+ */
+public class LazyBinaryString extends LazyBinaryPrimitive {
+
+ LazyBinaryString(WritableStringObjectInspector OI) {
+ super(OI);
+ data = new Text();
+ }
+
+ public LazyBinaryString(LazyBinaryString copy) {
+ super(copy);
+ data = new Text(copy.data);
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(length > -1);
+ data.set(bytes.getData(), start, length);
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (revision 0)
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+
+public class LazyBinaryFactory {
+
+ /**
+ * Create a lazy binary primitive class given the type name.
+ */
+ public static LazyBinaryPrimitive,?> createLazyBinaryPrimitiveClass(PrimitiveObjectInspector oi) {
+ PrimitiveCategory p = oi.getPrimitiveCategory();
+ switch(p) {
+ case BOOLEAN: {
+ return new LazyBinaryBoolean((WritableBooleanObjectInspector)oi);
+ }
+ case BYTE: {
+ return new LazyBinaryByte((WritableByteObjectInspector)oi);
+ }
+ case SHORT: {
+ return new LazyBinaryShort((WritableShortObjectInspector)oi);
+ }
+ case INT: {
+ return new LazyBinaryInteger((WritableIntObjectInspector)oi);
+ }
+ case LONG: {
+ return new LazyBinaryLong((WritableLongObjectInspector)oi);
+ }
+ case FLOAT: {
+ return new LazyBinaryFloat((WritableFloatObjectInspector)oi);
+ }
+ case DOUBLE: {
+ return new LazyBinaryDouble((WritableDoubleObjectInspector)oi);
+ }
+ case STRING: {
+ return new LazyBinaryString((WritableStringObjectInspector)oi);
+ }
+ default: {
+ throw new RuntimeException("Internal error: no LazyBinaryObject for " + p);
+ }
+ }
+ }
+
+ /**
+ * Create a hierarchical LazyBinaryObject based on the given typeInfo.
+ */
+ public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) {
+ ObjectInspector.Category c = oi.getCategory();
+ switch(c) {
+ case PRIMITIVE:
+ return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector)oi);
+ case MAP:
+ return new LazyBinaryMap((LazyBinaryMapObjectInspector)oi);
+ case LIST:
+ return new LazyBinaryArray((LazyBinaryListObjectInspector)oi);
+ case STRUCT:
+ return new LazyBinaryStruct((LazyBinaryStructObjectInspector)oi);
+ }
+
+ throw new RuntimeException("Hive LazyBinarySerDe Internal error.");
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java (revision 0)
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
+
+/**
+ * ObjectInspector for LazyBinaryMap
+ * @see LazyBinaryMap
+ */
+public class LazyBinaryMapObjectInspector extends StandardMapObjectInspector {
+
+ protected LazyBinaryMapObjectInspector(ObjectInspector mapKeyObjectInspector,
+ ObjectInspector mapValueObjectInspector) {
+ super(mapKeyObjectInspector, mapValueObjectInspector);
+ }
+
+ @Override
+ public Map, ?> getMap(Object data) {
+ if (data == null) {
+ return null;
+ }
+ return ((LazyBinaryMap)data).getMap();
+ }
+
+ @Override
+ public int getMapSize(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyBinaryMap)data).getMapSize();
+ }
+
+ @Override
+ public Object getMapValueElement(Object data, Object key) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyBinaryMap)data).getMapValueElement(key);
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java (revision 0)
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+/**
+ * ObjectInspector for LazyBinaryStruct
+ * @see LazyBinaryStruct
+ */
+public class LazyBinaryStructObjectInspector extends StandardStructObjectInspector {
+
+ protected LazyBinaryStructObjectInspector(List structFieldNames, List structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ protected LazyBinaryStructObjectInspector(List fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyBinaryStruct struct = (LazyBinaryStruct)data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert(fieldID >= 0 && fieldID < fields.size());
+
+ return struct.getField(fieldID);
+ }
+
+ @Override
+ public List
+ *
+ */
+public class LazyBinaryBoolean extends LazyBinaryPrimitive {
+
+ public LazyBinaryBoolean(WritableBooleanObjectInspector oi) {
+ super(oi);
+ data = new BooleanWritable();
+ }
+
+ public LazyBinaryBoolean(LazyBinaryBoolean copy) {
+ super(copy);
+ data = new BooleanWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(1 == length);
+ byte val = bytes.getData()[start];
+ if (val == 0) {
+ data.set(false);
+ } else if (val == 1) {
+ data.set(true);
+ }
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (revision 0)
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+public abstract class LazyBinaryNonPrimitive extends LazyBinaryObject {
+
+ protected ByteArrayRef bytes;
+ protected int start;
+ protected int length;
+
+ protected LazyBinaryNonPrimitive(OI oi) {
+ super(oi);
+ bytes = null;
+ start = 0;
+ length = 0;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ if (null == bytes) {
+ throw new RuntimeException("bytes cannot be null!");
+ }
+ if (length <= 0) {
+ throw new RuntimeException("length should be positive!");
+ }
+ this.bytes = bytes;
+ this.start = start;
+ this.length = length;
+ }
+}
Index: serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java
===================================================================
--- serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (revision 0)
+++ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (revision 0)
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * LazyBinaryStruct is serialized as follows:
+ * start A B A B A B end
+ * bytes[] -> |-----|---------|--- ... ---|-----|---------|
+ *
+ * Section A is one null-byte, corresponding to eight struct fields in Section B.
+ * Each bit indicates whether the corresponding field is null (0) or not null (1).
+ * Each field is a LazyBinaryObject.
+ *
+ * Following B, there is another section A and B. This pattern repeats until the
+ * all struct fields are serialized.
+ */
+public class LazyBinaryStruct extends LazyBinaryNonPrimitive {
+
+ private static Log LOG = LogFactory.getLog(LazyBinaryStruct.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The fields of the struct.
+ */
+ LazyBinaryObject[] fields;
+
+ /**
+ * Whether a field is initialized or not.
+ */
+ boolean[] fieldInited;
+
+ /**
+ * Whether a field is null or not.
+ * Because length is 0 does not means the field is null.
+ * In particular, a 0-length string is not null.
+ */
+ boolean[] fieldIsNull;
+
+ /**
+ * The start positions and lengths of struct fields.
+ * Only valid when the data is parsed.
+ */
+ int[] fieldStart;
+ int[] fieldLength;
+
+ /**
+ * Construct a LazyBinaryStruct object with an ObjectInspector.
+ */
+ protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) {
+ super(oi);
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+ /**
+ * Parse the byte[] and fill fieldStart, fieldLength,
+ * fieldInited and fieldIsNull.
+ */
+ private void parse() {
+
+ List extends StructField> fieldRefs = ((StructObjectInspector)oi).getAllStructFieldRefs();
+
+ if (fields == null) {
+ fields = new LazyBinaryObject[fieldRefs.size()];
+ for (int i = 0 ; i < fields.length; i++) {
+ fields[i] = LazyBinaryFactory.createLazyBinaryObject(fieldRefs.get(i).getFieldObjectInspector());
+ }
+ fieldInited = new boolean[fields.length];
+ fieldIsNull = new boolean[fields.length];
+ fieldStart = new int[fields.length];
+ fieldLength = new int[fields.length];
+ }
+
+ /**
+ * Please note that one null byte is followed by eight fields,
+ * then more null byte and fields.
+ */
+
+ int fieldId = 0;
+ int structByteEnd = start + length;
+ byte[] bytes = this.bytes.getData();
+
+ byte nullByte = bytes[start];
+ int lastFieldByteEnd = start + 1;
+ // Go through all bytes in the byte[]
+ for (int i=0; i structByteEnd) {
+ missingFieldWarned = true;
+ LOG.warn("Missing fields! Expected " + fields.length + " fields but "
+ + "only got " + fieldId + "! Ignoring similar problems.");
+ }
+
+ Arrays.fill(fieldInited, false);
+ parsed = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ *
+ * If the field is a primitive field, return the actual object.
+ * Otherwise return the LazyObject. This is because PrimitiveObjectInspector
+ * does not have control over the object used by the user - the user simply
+ * directly use the Object instead of going through
+ * Object PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!parsed) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed.
+ * This is called by both getField and getFieldsAsList.
+ * @param fieldID The id of the field starting from 0.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // Test the length first so in most cases we avoid doing a byte[]
+ // comparison.
+ if (fieldIsNull[fieldID]) {
+ return null;
+ }
+ if (!fieldInited[fieldID]) {
+ fieldInited[fieldID] = true;
+ fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
+ }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList cachedList;
+ /**
+ * Get the values of the fields as an ArrayList.
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList getFieldsAsList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList();
+ } else {
+ cachedList.clear();
+ }
+ for (int i=0; i