diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
index 4900a41..df8c70e 100644
--- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
+++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
@@ -33,6 +33,7 @@
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -68,8 +69,8 @@
public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
-
- /** Determines whether a regex matching should be done on the columns or not. Defaults to true.
+
+ /** Determines whether a regex matching should be done on the columns or not. Defaults to true.
* WARNING: Note that currently this only supports the suffix wildcard .* **/
public static final String HBASE_COLUMNS_REGEX_MATCHING = "hbase.columns.mapping.regex.matching";
@@ -126,7 +127,8 @@ public void initialize(Configuration conf, Properties tbl)
serdeParams.getNullSequence(),
serdeParams.isLastColumnTakesRest(),
serdeParams.isEscaped(),
- serdeParams.getEscapeChar());
+ serdeParams.getEscapeChar(),
+ serdeParams.isBase64Encoding());
cachedHBaseRow = new LazyHBaseRow(
(LazySimpleStructObjectInspector) cachedObjectInspector);
diff --git a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
index 28f8d6a..8ad4224 100644
--- a/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
+++ b/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
@@ -6,30 +6,8 @@
*/
package org.apache.hadoop.hive.serde;
-import org.apache.commons.lang.builder.HashCodeBuilder;
-import org.apache.thrift.scheme.IScheme;
-import org.apache.thrift.scheme.SchemeFactory;
-import org.apache.thrift.scheme.StandardScheme;
-
-import org.apache.thrift.scheme.TupleScheme;
-import org.apache.thrift.protocol.TTupleProtocol;
-import org.apache.thrift.protocol.TProtocolException;
-import org.apache.thrift.EncodingUtils;
-import org.apache.thrift.TException;
-import java.util.List;
-import java.util.ArrayList;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.EnumMap;
-import java.util.Set;
import java.util.HashSet;
-import java.util.EnumSet;
-import java.util.Collections;
-import java.util.BitSet;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.util.Set;
public class serdeConstants {
@@ -49,6 +27,8 @@
public static final String SERIALIZATION_USE_JSON_OBJECTS = "serialization.use.json.object";
+ public static final String SERIALIZATION_USE_BASE64_ENCODING = "serialization.use.base64.encoding";
+
public static final String FIELD_DELIM = "field.delim";
public static final String COLLECTION_DELIM = "colelction.delim";
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
index 11f5f07..0ac1ec2 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java
@@ -26,11 +26,9 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -79,6 +77,7 @@ public ColumnarSerDe() throws SerDeException {
*
* @see SerDe#initialize(Configuration, Properties)
*/
+ @Override
public void initialize(Configuration job, Properties tbl) throws SerDeException {
serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, getClass().getName());
@@ -88,7 +87,7 @@ public void initialize(Configuration job, Properties tbl) throws SerDeException
cachedObjectInspector = LazyFactory.createColumnarStructInspector(
serdeParams.getColumnNames(), serdeParams.getColumnTypes(), serdeParams
.getSeparators(), serdeParams.getNullSequence(), serdeParams
- .isEscaped(), serdeParams.getEscapeChar());
+ .isEscaped(), serdeParams.getEscapeChar(), serdeParams.isBase64Encoding());
java.util.ArrayList notSkipIDs = ColumnProjectionUtils.getReadColumnIDs(job);
@@ -114,6 +113,7 @@ public void initialize(Configuration job, Properties tbl) throws SerDeException
* @return The serialized Writable object
* @see SerDe#serialize(Object, ObjectInspector)
*/
+ @Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
if (objInspector.getCategory() != Category.STRUCT) {
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
index ae12f20..0a14ab9 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
@@ -18,14 +18,11 @@
package org.apache.hadoop.hive.serde2.lazy;
-import java.nio.charset.CharacterCodingException;
-
import org.apache.commons.codec.binary.Base64;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.Text;
public class LazyBinary extends LazyPrimitive {
@@ -49,12 +46,12 @@ public void init(ByteArrayRef bytes, int start, int length) {
byte[] recv = new byte[length];
System.arraycopy(bytes.getData(), start, recv, 0, length);
- boolean arrayByteBase64 = Base64.isArrayByteBase64(recv);
+ /*boolean arrayByteBase64 = Base64.isArrayByteBase64(recv);
if (arrayByteBase64) {
LOG.debug("Data not contains valid characters within the Base64 alphabet so " +
"decoded the data.");
- }
- byte[] decoded = arrayByteBase64 ? Base64.decodeBase64(recv) : recv;
+ }*/
+ byte[] decoded = oi.isBase64Encoding() ? Base64.decodeBase64(recv) : recv;
data.set(decoded, 0, decoded.length);
}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
index a08b4a8..4872493 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
@@ -27,18 +27,18 @@
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBooleanObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyByteObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDoubleObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyHiveDecimalObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyIntObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyLongObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyShortObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector;
-import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyDateObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyTimestampObjectInspector;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyVoidObjectInspector;
import org.apache.hadoop.hive.serde2.lazydio.LazyDioBoolean;
@@ -212,27 +212,34 @@
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar) throws SerDeException {
+ return createLazyObjectInspector(typeInfo,
+ separator, separatorIndex, nullSequence, escaped, escapeChar);
+ }
+
+ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+ byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+ byte escapeChar, boolean isBase64Encoding) throws SerDeException {
ObjectInspector.Category c = typeInfo.getCategory();
switch (c) {
case PRIMITIVE:
return LazyPrimitiveObjectInspectorFactory.getLazyObjectInspector(
((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), escaped,
- escapeChar);
+ escapeChar, isBase64Encoding);
case MAP:
return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(
createLazyObjectInspector(((MapTypeInfo) typeInfo)
- .getMapKeyTypeInfo(), separator, separatorIndex + 2,
- nullSequence, escaped, escapeChar), createLazyObjectInspector(
- ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator,
- separatorIndex + 2, nullSequence, escaped, escapeChar),
+ .getMapKeyTypeInfo(), separator, separatorIndex + 2,
+ nullSequence, escaped, escapeChar, isBase64Encoding), createLazyObjectInspector(
+ ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator,
+ separatorIndex + 2, nullSequence, escaped, escapeChar, isBase64Encoding),
LazyUtils.getSeparator(separator, separatorIndex),
- LazyUtils.getSeparator(separator, separatorIndex+1),
+ LazyUtils.getSeparator(separator, separatorIndex + 1),
nullSequence, escaped, escapeChar);
case LIST:
return LazyObjectInspectorFactory.getLazySimpleListObjectInspector(
createLazyObjectInspector(((ListTypeInfo) typeInfo)
.getListElementTypeInfo(), separator, separatorIndex + 1,
- nullSequence, escaped, escapeChar), LazyUtils.getSeparator(separator, separatorIndex),
+ nullSequence, escaped, escapeChar, isBase64Encoding), LazyUtils.getSeparator(separator, separatorIndex),
nullSequence, escaped, escapeChar);
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
@@ -244,7 +251,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
for (int i = 0; i < fieldTypeInfos.size(); i++) {
fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos
.get(i), separator, separatorIndex + 1, nullSequence, escaped,
- escapeChar));
+ escapeChar, isBase64Encoding));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
fieldNames, fieldObjectInspectors,
@@ -256,7 +263,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) {
lazyOIs.add(createLazyObjectInspector(uti, separator,
separatorIndex + 1, nullSequence, escaped,
- escapeChar));
+ escapeChar, isBase64Encoding));
}
return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs,
LazyUtils.getSeparator(separator, separatorIndex),
@@ -277,24 +284,37 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
* @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
* boolean, byte)
*/
+
public static ObjectInspector createLazyStructInspector(
List columnNames, List typeInfos, byte[] separators,
Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
byte escapeChar) throws SerDeException {
+ return createLazyStructInspector(
+ columnNames, typeInfos, separators,
+ nullSequence, lastColumnTakesRest, escaped,
+ escapeChar, true);
+ }
+
+ public static ObjectInspector createLazyStructInspector(
+ List columnNames, List typeInfos, byte[] separators,
+ Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
+ byte escapeChar, boolean isBase64Encoding) throws SerDeException {
ArrayList columnObjectInspectors = new ArrayList(
typeInfos.size());
for (int i = 0; i < typeInfos.size(); i++) {
columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(
- typeInfos.get(i), separators, 1, nullSequence, escaped, escapeChar));
+ typeInfos.get(i), separators, 1, nullSequence, escaped, escapeChar, isBase64Encoding));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
columnNames, columnObjectInspectors, separators[0], nullSequence,
lastColumnTakesRest, escaped, escapeChar);
}
+
/**
* Create a hierarchical ObjectInspector for ColumnarStruct with the given
* columnNames and columnTypeInfos.
+ * @param isBase64Encoding
* @throws SerDeException
*
* @see LazyFactory#createLazyObjectInspector(TypeInfo, byte[], int, Text,
@@ -302,13 +322,13 @@ public static ObjectInspector createLazyStructInspector(
*/
public static ObjectInspector createColumnarStructInspector(
List columnNames, List columnTypes, byte[] separators,
- Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException {
+ Text nullSequence, boolean escaped, byte escapeChar, boolean isBase64Encoding) throws SerDeException {
ArrayList columnObjectInspectors = new ArrayList(
columnTypes.size());
for (int i = 0; i < columnTypes.size(); i++) {
columnObjectInspectors
.add(LazyFactory.createLazyObjectInspector(columnTypes.get(i),
- separators, 1, nullSequence, escaped, escapeChar));
+ separators, 1, nullSequence, escaped, escapeChar, isBase64Encoding));
}
return ObjectInspectorFactory.getColumnarStructObjectInspector(columnNames,
columnObjectInspectors);
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
index 606208c..6c437f4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java
@@ -128,6 +128,7 @@ public static byte getByte(String altValue, byte defaultVal) {
boolean escaped;
byte escapeChar;
boolean[] needsEscape;
+ boolean base64Encoding;
public List getColumnTypes() {
return columnTypes;
@@ -168,6 +169,15 @@ public byte getEscapeChar() {
public boolean[] getNeedsEscape() {
return needsEscape;
}
+
+ public boolean isBase64Encoding() {
+ return base64Encoding;
+ }
+
+ public void setBase64Encoding(boolean base64Encoding) {
+ this.base64Encoding = base64Encoding;
+ }
+
}
SerDeParameters serdeParams = null;
@@ -192,7 +202,7 @@ public void initialize(Configuration job, Properties tbl)
.getColumnNames(), serdeParams.getColumnTypes(), serdeParams
.getSeparators(), serdeParams.getNullSequence(), serdeParams
.isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams
- .getEscapeChar());
+ .getEscapeChar(), serdeParams.isBase64Encoding());
cachedLazyStruct = (LazyStruct) LazyFactory
.createLazyObject(cachedObjectInspector);
@@ -307,6 +317,12 @@ public static SerDeParameters initSerdeParams(Configuration job,
serdeParams.needsEscape[serdeParams.separators[i]] = true;
}
}
+ String strBase64 = tbl.getProperty(serdeConstants.SERIALIZATION_USE_BASE64_ENCODING, "FALSE");
+ if (strBase64.equalsIgnoreCase("TRUE")) {
+ serdeParams.base64Encoding = true;
+ } else {
+ serdeParams.base64Encoding = false;
+ }
return serdeParams;
}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBinaryObjectInspector.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBinaryObjectInspector.java
index dbd60f7..bb13e11 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBinaryObjectInspector.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyBinaryObjectInspector.java
@@ -27,9 +27,15 @@
public class LazyBinaryObjectInspector extends
AbstractPrimitiveLazyObjectInspector implements
BinaryObjectInspector {
+ protected boolean base64Encoding = true;
public LazyBinaryObjectInspector() {
+ this(true);
+ }
+
+ public LazyBinaryObjectInspector(boolean _base64Encoding) {
super(PrimitiveObjectInspectorUtils.binaryTypeEntry);
+ this.base64Encoding = _base64Encoding;
}
@Override
@@ -49,4 +55,13 @@ public Object copyObject(Object o) {
public BytesWritable getPrimitiveWritableObject(Object o) {
return null == o ? null : ((LazyBinary) o).getWritableObject();
}
+
+ public boolean isBase64Encoding() {
+ return base64Encoding;
+ }
+
+ public void setBase64Encoding(boolean base64Encoding) {
+ this.base64Encoding = base64Encoding;
+ }
+
}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
index afbf454..a8739b4 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/primitive/LazyPrimitiveObjectInspectorFactory.java
@@ -58,7 +58,9 @@
public static final LazyTimestampObjectInspector LAZY_TIMESTAMP_OBJECT_INSPECTOR =
new LazyTimestampObjectInspector();
public static final LazyBinaryObjectInspector LAZY_BINARY_OBJECT_INSPECTOR =
- new LazyBinaryObjectInspector();
+ new LazyBinaryObjectInspector(false);
+ public static final LazyBinaryObjectInspector LAZY_BINARY_OBJECT_INSPECTOR_BASE64_ENCODING =
+ new LazyBinaryObjectInspector(true);
public static final LazyHiveDecimalObjectInspector LAZY_BIG_DECIMAL_OBJECT_INSPECTOR =
new LazyHiveDecimalObjectInspector();
@@ -80,8 +82,7 @@ public static LazyStringObjectInspector getLazyStringObjectInspector(
}
public static AbstractPrimitiveLazyObjectInspector> getLazyObjectInspector(
- PrimitiveCategory primitiveCategory, boolean escaped, byte escapeChar) {
-
+ PrimitiveCategory primitiveCategory, boolean escaped, byte escapeChar, boolean isBase64Encoding) {
switch (primitiveCategory) {
case BOOLEAN:
return LAZY_BOOLEAN_OBJECT_INSPECTOR;
@@ -100,7 +101,11 @@ public static LazyStringObjectInspector getLazyStringObjectInspector(
case STRING:
return getLazyStringObjectInspector(escaped, escapeChar);
case BINARY:
- return LAZY_BINARY_OBJECT_INSPECTOR;
+ if (isBase64Encoding) {
+ return LAZY_BINARY_OBJECT_INSPECTOR_BASE64_ENCODING;
+ } else {
+ return LAZY_BINARY_OBJECT_INSPECTOR;
+ }
case VOID:
return LAZY_VOID_OBJECT_INSPECTOR;
case DATE: