columnTypes;
final private ObjectInspector oi;
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
new file mode 100644
index 0000000..fcd2621
--- /dev/null
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import org.apache.avro.Schema;
+
+/**
+ * Retrieves the avro schema from the given source. "Source" is a little loose term here in the
+ * sense it can range from being an HDFS url location pointing to the schema or it can be even as
+ * simple as a {@link Properties properties} file with a simple key-value mapping to the schema. For
+ * cases where the {@link Schema schema} is a part of the serialized data itself, "Source" would
+ * refer to the data bytes from which the {@link Schema schema} has to retrieved.
+ *
+ * */
+public abstract class AvroSchemaRetriever {
+
+ /**
+ * Retrieve the writer avro schema from the given source
+ *
+ * @param source source from which the schema has to retrieved
+ * @return the retrieved writer {@link Schema}
+ * */
+ public abstract Schema retrieveWriterSchema(Object source);
+
+ /**
+ * Retrieve the reader avro schema from the given source
+ *
+ * @param source source from which the schema has to retrieved
+ * @return the retrieved reader {@link Schema}
+ * */
+ public Schema retrieveReaderSchema(Object source) {
+ return null;
+ }
+
+ /**
+ * Possible offset associated with schema. This is useful when the schema is stored inline along
+ * with the data.
+ *
+ *
+ * Defaulted to zero. Consumers can choose to override this value to provide a custom offset.
+ *
+ * */
+ public int getOffset() {
+ return 0;
+ }
+}
\ No newline at end of file
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
index 7c48e9b..5da12cb 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
@@ -57,6 +57,7 @@
public static final String EXCEPTION_MESSAGE = "Neither " + SCHEMA_LITERAL + " nor "
+ SCHEMA_URL + " specified, can't determine table schema";
public static final String AVRO_SERDE_SCHEMA = "avro.serde.schema";
+ public static final String SCHEMA_RETRIEVER = "avro.schema.retriever";
/**
* Determine the schema to that's been provided for Avro serde work.
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
index d9a38ba..e3968a9 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
@@ -53,6 +53,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -217,11 +218,11 @@
*/
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
- byte escapeChar) throws SerDeException {
+ byte escapeChar, ObjectInspectorOptions option) throws SerDeException {
return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
- escaped, escapeChar, false);
+ escaped, escapeChar, false, option);
}
-
+
/**
* Create a hierarchical ObjectInspector for LazyObject with the given
* typeInfo.
@@ -236,13 +237,54 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
* delimiting entries, the second one for delimiting key and values.
* @param nullSequence
* The sequence of bytes representing NULL.
+ * @return The ObjectInspector
+ * @throws SerDeException
+ */
+ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+ byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+ byte escapeChar) throws SerDeException {
+ return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
+ escaped, escapeChar, false, ObjectInspectorOptions.JAVA);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+ *
+ * @param typeInfo The type information for the LazyObject
+ * @param separator The array of separators for delimiting each level
+ * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+ * separator, and map uses 2 levels: the first one for delimiting entries, the second one
+ * for delimiting key and values.
+ * @param nullSequence The sequence of bytes representing NULL.
* @param extendedBooleanLiteral whether extended boolean literal set is legal
+ * @param option the {@link ObjectInspectorOption}
* @return The ObjectInspector
* @throws SerDeException
*/
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException {
+ return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped,
+ escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+ *
+ * @param typeInfo The type information for the LazyObject
+ * @param separator The array of separators for delimiting each level
+ * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+ * separator, and map uses 2 levels: the first one for delimiting entries, the second one
+ * for delimiting key and values.
+ * @param nullSequence The sequence of bytes representing NULL.
+ * @param extendedBooleanLiteral whether extended boolean literal set is legal
+ * @param option the {@link ObjectInspectorOption}
+ * @return The ObjectInspector
+ * @throws SerDeException
+ */
+ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+ byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+ byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException {
ObjectInspector.Category c = typeInfo.getCategory();
switch (c) {
case PRIMITIVE:
@@ -252,9 +294,9 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(
createLazyObjectInspector(((MapTypeInfo) typeInfo)
.getMapKeyTypeInfo(), separator, separatorIndex + 2,
- nullSequence, escaped, escapeChar, extendedBooleanLiteral), createLazyObjectInspector(
+ nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector(
((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator,
- separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral),
+ separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option),
LazyUtils.getSeparator(separator, separatorIndex),
LazyUtils.getSeparator(separator, separatorIndex+1),
nullSequence, escaped, escapeChar);
@@ -262,7 +304,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
return LazyObjectInspectorFactory.getLazySimpleListObjectInspector(
createLazyObjectInspector(((ListTypeInfo) typeInfo)
.getListElementTypeInfo(), separator, separatorIndex + 1,
- nullSequence, escaped, escapeChar, extendedBooleanLiteral), LazyUtils.getSeparator(separator, separatorIndex),
+ nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex),
nullSequence, escaped, escapeChar);
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
@@ -274,19 +316,20 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
for (int i = 0; i < fieldTypeInfos.size(); i++) {
fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos
.get(i), separator, separatorIndex + 1, nullSequence, escaped,
- escapeChar, extendedBooleanLiteral));
+ escapeChar, extendedBooleanLiteral, option));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
fieldNames, fieldObjectInspectors,
LazyUtils.getSeparator(separator, separatorIndex),
- nullSequence, false, escaped, escapeChar);
+ nullSequence,
+ false, escaped, escapeChar, option);
case UNION:
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List lazyOIs = new ArrayList();
for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) {
lazyOIs.add(createLazyObjectInspector(uti, separator,
separatorIndex + 1, nullSequence, escaped,
- escapeChar, extendedBooleanLiteral));
+ escapeChar, extendedBooleanLiteral, option));
}
return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs,
LazyUtils.getSeparator(separator, separatorIndex),
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
index c74af4b..588cc8c 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
@@ -342,4 +342,13 @@ public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) {
}
return indexes;
}
-}
+
+ /**
+ * Return the data in bytes corresponding to this given struct. This is useful specifically in
+ * cases where the data is stored in serialized formats like protobufs or thrift and would need
+ * custom deserializers to be deserialized.
+ * */
+ public byte[] getBytes() {
+ return bytes.getData();
+ }
+}
\ No newline at end of file
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
index 9f6bc3f..22fc638 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.serde2.lazy;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.io.Text;
/**
@@ -26,8 +25,7 @@
* non-primitive.
*
*/
-public class LazyUnion extends
- LazyNonPrimitive {
+public class LazyUnion extends LazyNonPrimitive {
/**
* Whether the data is already parsed or not.
*/
@@ -41,7 +39,7 @@
/**
* The object of the union.
*/
- private LazyObject extends ObjectInspector> field;
+ private Object field;
/**
* Tag of the Union
@@ -54,6 +52,16 @@
private boolean fieldInited = false;
/**
+ * Whether the tag has been set or not
+ * */
+ private boolean tagSet = false;
+
+ /**
+ * Whether the field has been set or not
+ * */
+ private boolean fieldSet = false;
+
+ /**
* Construct a LazyUnion object with the ObjectInspector.
*/
public LazyUnion(LazyUnionObjectInspector oi) {
@@ -123,6 +131,7 @@ private void parse() {
*
* @return The value of the field
*/
+ @SuppressWarnings("rawtypes")
private Object uncheckedGetField() {
Text nullSequence = oi.getNullSequence();
int fieldLength = start + length - startPosition;
@@ -134,9 +143,9 @@ private Object uncheckedGetField() {
if (!fieldInited) {
fieldInited = true;
- field.init(bytes, startPosition, fieldLength);
+ ((LazyObject) field).init(bytes, startPosition, fieldLength);
}
- return field.getObject();
+ return ((LazyObject) field).getObject();
}
/**
@@ -145,6 +154,10 @@ private Object uncheckedGetField() {
* @return The field as a LazyObject
*/
public Object getField() {
+ if (fieldSet) {
+ return field;
+ }
+
if (!parsed) {
parse();
}
@@ -157,9 +170,33 @@ public Object getField() {
* @return The tag byte
*/
public byte getTag() {
+ if (tagSet) {
+ return tag;
+ }
+
if (!parsed) {
parse();
}
return tag;
}
-}
+
+ /**
+ * Set the field of the union
+ *
+ * @param field the field to be set
+ * */
+ public void setField(Object field) {
+ this.field = field;
+ fieldSet = true;
+ }
+
+ /**
+ * Set the tag for the union
+ *
+ * @param tag the tag to be set
+ * */
+ public void setTag(byte tag) {
+ this.tag = tag;
+ tagSet = true;
+ }
+}
\ No newline at end of file
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
index ed670b0..1abd8a5 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
@@ -22,7 +22,9 @@
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.io.Text;
/**
@@ -48,14 +50,34 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector
byte escapeChar) {
return getLazySimpleStructObjectInspector(structFieldNames,
structFieldObjectInspectors, null, separator, nullSequence,
- lastColumnTakesRest, escaped, escapeChar);
+ lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA);
+ }
+
+ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
+ List structFieldNames,
+ List structFieldObjectInspectors, byte separator,
+ Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
+ byte escapeChar, ObjectInspectorOptions option) {
+ return getLazySimpleStructObjectInspector(structFieldNames,
+ structFieldObjectInspectors, null, separator, nullSequence,
+ lastColumnTakesRest, escaped, escapeChar, option);
}
public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
List structFieldNames,
List structFieldObjectInspectors, List structFieldComments,
byte separator, Text nullSequence, boolean lastColumnTakesRest,
- boolean escaped,byte escapeChar) {
+ boolean escaped, byte escapeChar) {
+ return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors,
+ structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar,
+ ObjectInspectorOptions.JAVA);
+ }
+
+ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
+ List structFieldNames,
+ List structFieldObjectInspectors, List structFieldComments,
+ byte separator, Text nullSequence, boolean lastColumnTakesRest,
+ boolean escaped,byte escapeChar, ObjectInspectorOptions option) {
ArrayList