diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java
index 5731e45..12c5377 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.hbase;
+import java.io.IOException;
+import java.util.Properties;
+
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
@@ -26,9 +29,6 @@
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import java.io.IOException;
-import java.util.Properties;
-
public class DefaultHBaseKeyFactory extends AbstractHBaseKeyFactory implements HBaseKeyFactory {
protected LazySimpleSerDe.SerDeParameters serdeParams;
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
index ca2f40e..aedd843 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
@@ -53,6 +53,7 @@
public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class";
public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types";
public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory";
+ public static final String HBASE_STRUCT_SERIALIZER_CLASS = "hbase.struct.serialization.class";
public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
@@ -98,7 +99,7 @@ public void initialize(Configuration conf, Properties tbl)
cachedHBaseRow = new LazyHBaseRow(
(LazySimpleStructObjectInspector) cachedObjectInspector,
- serdeParams.getKeyIndex(), serdeParams.getKeyFactory());
+ serdeParams.getKeyIndex(), serdeParams.getKeyFactory(), serdeParams.getValueFactories());
serializer = new HBaseRowSerializer(serdeParams);
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
index 25a9cfc..9f2f02f 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
@@ -41,6 +41,10 @@
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.util.StringUtils;
@@ -371,6 +375,19 @@ public static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf)
}
/**
+ * Create the {@link LazyObjectBase lazy field}
+ * */
+ public static LazyObjectBase createLazyField(ColumnMapping[] columnMappings, int fieldID,
+ ObjectInspector inspector) {
+ ColumnMapping colMap = columnMappings[fieldID];
+ if (colMap.getQualifierName() == null && !colMap.isHbaseRowKey()) {
+ // a column family
+ return new LazyHBaseCellMap((LazyMapObjectInspector) inspector);
+ }
+ return LazyFactory.createLazyObject(inspector, colMap.getBinaryStorage().get(0));
+ }
+
+ /**
* Auto-generates the key struct for composite keys
*
* @param compositeKeyParts map of composite key part name to its type. Usually this would be
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
index 8878eb5..9efa494 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
@@ -29,6 +29,7 @@
import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory;
import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory;
import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.StructHBaseValueFactory;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
@@ -204,11 +205,21 @@ private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tb
for (int i = 0; i < columnMappings.size(); i++) {
String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]);
- if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) {
+ if (AVRO_SERIALIZATION_TYPE.equals(serType)) {
Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]);
- valueFactories.add(new AvroHBaseValueFactory(schema));
+ valueFactories.add(new AvroHBaseValueFactory(i, schema));
+ } else if (STRUCT_SERIALIZATION_TYPE.equals(serType)) {
+ String structValueClassName = tbl.getProperty(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS);
+
+ if (structValueClassName == null) {
+ throw new IllegalArgumentException(HBaseSerDe.HBASE_STRUCT_SERIALIZER_CLASS
+ + " must be set for hbase columns of type [" + STRUCT_SERIALIZATION_TYPE + "]");
+ }
+
+ Class> structValueClass = job.getClassByName(structValueClassName);
+ valueFactories.add(new StructHBaseValueFactory(i, structValueClass));
} else {
- valueFactories.add(new DefaultHBaseValueFactory());
+ valueFactories.add(new DefaultHBaseValueFactory(i));
}
}
} catch (Exception e) {
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
index 3e8b8fd..6ac8423 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseRow.java
@@ -20,15 +20,15 @@
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.List;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
-import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -47,18 +47,21 @@
private final int iKey;
private final HBaseKeyFactory keyFactory;
+ private final List valueFactories;
public LazyHBaseRow(LazySimpleStructObjectInspector oi) {
- this(oi, -1, null);
+ this(oi, -1, null, null);
}
/**
* Construct a LazyHBaseRow object with the ObjectInspector.
*/
- public LazyHBaseRow(LazySimpleStructObjectInspector oi, int iKey, HBaseKeyFactory keyFactory) {
+ public LazyHBaseRow(LazySimpleStructObjectInspector oi, int iKey, HBaseKeyFactory keyFactory,
+ List valueFactories) {
super(oi);
this.iKey = iKey;
this.keyFactory = keyFactory;
+ this.valueFactories = valueFactories;
}
/**
@@ -76,13 +79,14 @@ protected LazyObjectBase createLazyField(int fieldID, StructField fieldRef) thro
if (fieldID == iKey) {
return keyFactory.createKey(fieldRef.getFieldObjectInspector());
}
- ColumnMapping colMap = columnsMapping[fieldID];
- if (colMap.qualifierName == null && !colMap.hbaseRowKey) {
- // a column family
- return new LazyHBaseCellMap((LazyMapObjectInspector) fieldRef.getFieldObjectInspector());
+
+ if (valueFactories != null) {
+ return valueFactories.get(fieldID).createValueObject(fieldRef.getFieldObjectInspector());
}
- return LazyFactory.createLazyObject(fieldRef.getFieldObjectInspector(),
- colMap.binaryStorage.get(0));
+
+ // fallback to default
+ return HBaseSerDeHelper.createLazyField(columnsMapping, fieldID,
+ fieldRef.getFieldObjectInspector());
}
/**
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
index c341c0a..a2ba827 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
@@ -48,7 +48,8 @@
*
* @param schema the associated {@link Schema schema}
* */
- public AvroHBaseValueFactory(Schema schema) {
+ public AvroHBaseValueFactory(int fieldID, Schema schema) {
+ super(fieldID);
this.schema = schema;
}
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
index ac2cb57..f7a425d 100644
--- hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
@@ -21,9 +21,12 @@
import java.util.Properties;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.ColumnMappings;
+import org.apache.hadoop.hive.hbase.HBaseSerDeHelper;
import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
@@ -35,15 +38,23 @@
public class DefaultHBaseValueFactory implements HBaseValueFactory{
protected LazySimpleSerDe.SerDeParameters serdeParams;
+ protected ColumnMappings columnMappings;
protected HBaseSerDeParameters hbaseParams;
protected Properties properties;
protected Configuration conf;
+ private int fieldID;
+
+ public DefaultHBaseValueFactory(int fieldID) {
+ this.fieldID = fieldID;
+ }
+
@Override
public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties)
throws SerDeException {
this.hbaseParams = hbaseParams;
this.serdeParams = hbaseParams.getSerdeParams();
+ this.columnMappings = hbaseParams.getColumnMappings();
this.properties = properties;
this.conf = conf;
}
@@ -55,6 +66,11 @@ public ObjectInspector createValueObjectInspector(TypeInfo type)
1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
}
+ @Override
+ public LazyObjectBase createValueObject(ObjectInspector inspector) throws SerDeException {
+ return HBaseSerDeHelper.createLazyField(columnMappings.getColumnsMapping(), fieldID, inspector);
+ }
+
@Override
public byte[] serializeValue(Object object, StructField field)
throws IOException {
diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseStructValue.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseStructValue.java
new file mode 100644
index 0000000..8fba79b
--- /dev/null
+++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseStructValue.java
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+/**
+ * This is an extension of LazyStruct. All value structs should extend this class and override the
+ * {@link LazyStruct#getField(int)} method where fieldID corresponds to the ID of a value in the
+ * value structure.
+ *
+ * For example, for a value structure "/part1/part2/part3", part1 will have an id
+ * 0, part2 will have an id 1 and part3 will have an id 2. Custom
+ * implementations of getField(fieldID) should return the value corresponding to that fieldID. So,
+ * for the above example, the value returned for getField(0) should be part1,
+ * getField(1) should be part2 and getField(2) should be part3.
+ *
+ *
+ * All implementation are expected to have a constructor of the form
+ *
+ *