diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java index 46c3c1a48e533623e0d2c63ad9d7773e67e1de85..370d7f7395e758b11dd6bfae5fc1f910d5b0ae15 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java @@ -30,7 +30,7 @@ import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java index 66ab01e377cf12bf09f81ca5d1eec7de76275868..ff4f86c049ca73cecc7acf346aee9730b6547516 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java @@ -32,7 +32,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java index ef77697907a18f42ae8a623a04ed1b086efc189c..d0799b3ca79dbb1f9ae197db4722129edf3e0c80 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDeParameters.java @@ -30,8 +30,7 @@ import org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.util.ReflectionUtils; import org.apache.log4j.Logger; @@ -71,7 +70,7 @@ public AccumuloSerDeParameters(Configuration conf, Properties tableProperties, S this.tableProperties = tableProperties; this.serdeName = serdeName; - lazySerDeParameters = LazySimpleSerDe.initSerdeParams(conf, tableProperties, serdeName); + lazySerDeParameters = new SerDeParameters(conf, tableProperties, serdeName); // The default encoding for this table when not otherwise specified String defaultStorage = tableProperties.getProperty(DEFAULT_STORAGE_TYPE); diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java index 11806791abb86d80a8762dbe8ce717b476143981..cbf17c033ee433e4f031599ccd47a283d24d7170 100644 --- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java +++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/DefaultAccumuloRowIdFactory.java @@ -29,7 +29,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -42,7 +42,7 @@ public class DefaultAccumuloRowIdFactory implements AccumuloRowIdFactory { protected AccumuloSerDeParameters accumuloSerDeParams; - protected LazySimpleSerDe.SerDeParameters serdeParams; + protected SerDeParameters serdeParams; protected Properties properties; protected HiveAccumuloRowIdColumnMapping rowIdMapping; protected AccumuloRowSerializer serializer; diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java index e0b51cbeee3ee0cccf21749915ded0d398c4bbb5..3943a6d040e6330cdce37712cf00815fd6e65cba 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloRow.java @@ -33,7 +33,7 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyInteger; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyString; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.lazydio.LazyDioInteger; @@ -58,7 +58,7 @@ public void testExpectedDeserializationOfColumns() throws Exception { TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo); LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory - .createLazyStructInspector(columns, types, LazySimpleSerDe.DefaultSeparators, new Text( + .createLazyStructInspector(columns, types, SerDeParameters.DefaultSeparators, new Text( "\\N"), false, false, (byte) '\\'); DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); @@ -119,7 +119,7 @@ public void testDeserializationOfBinaryEncoding() throws Exception { TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.intTypeInfo); LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory - .createLazyStructInspector(columns, types, LazySimpleSerDe.DefaultSeparators, new Text( + .createLazyStructInspector(columns, types, SerDeParameters.DefaultSeparators, new Text( "\\N"), false, false, (byte) '\\'); DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); @@ -202,7 +202,7 @@ public void testNullInit() throws SerDeException { TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME)); LazySimpleStructObjectInspector objectInspector = (LazySimpleStructObjectInspector) LazyFactory - .createLazyStructInspector(columns, types, LazySimpleSerDe.DefaultSeparators, new Text( + .createLazyStructInspector(columns, types, SerDeParameters.DefaultSeparators, new Text( "\\N"), false, false, (byte) '\\'); DefaultAccumuloRowIdFactory rowIdFactory = new DefaultAccumuloRowIdFactory(); diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java index 706b26e9f9c645e4f57dc0b2d33b7db852756f12..47b2a1e65f241200558a1729568a7512aeb6758b 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java @@ -43,7 +43,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java index f613a581026cdf3b42d7c14a9a56dc8fc3384d58..86f40cf1fae9b483079552b6c4146b27ae690c63 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java @@ -35,7 +35,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java index 18b84e4d896697730ba377c549e2fd7e50bf1878..d8635cc17c4f1e4b8801fe2263276982c5f30d88 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java @@ -44,7 +44,7 @@ import org.apache.hadoop.hive.serde2.lazy.LazyArray; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyMap; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyString; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; diff --git a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java index d464740014ed3d0528bba3ee294a6fdfa366e412..050858fdb435f2e4cf3591c36bb9bbfee6aff6b2 100644 --- a/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java +++ b/accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyString; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; @@ -33,8 +33,6 @@ import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyPrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaStringObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.junit.Assert; import org.junit.Test; diff --git a/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java b/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java index 72f42341aba0ee2824019ef97b59221f3510f6b6..9b337ae2385548f8f72194256c157cad6a729060 100644 --- a/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java +++ b/contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java @@ -43,6 +43,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -62,7 +63,8 @@ serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, serdeConstants.ESCAPE_CHAR, serdeConstants.SERIALIZATION_ENCODING, - LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS}) + SerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, + SerDeParameters.SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS}) public class MultiDelimitSerDe extends AbstractSerDe { private static final Log LOG = LogFactory.getLog(MultiDelimitSerDe.class.getName()); private static final byte[] DEFAULT_SEPARATORS = {(byte) 1, (byte) 2, (byte) 3}; @@ -85,7 +87,7 @@ // The wrapper for byte array private ByteArrayRef byteArrayRef; - private LazySimpleSerDe.SerDeParameters serdeParams = null; + private SerDeParameters serdeParams = null; // The output stream of serialized objects private final ByteStream.Output serializeStream = new ByteStream.Output(); // The Writable to return in serialize @@ -94,7 +96,7 @@ @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { // get the SerDe parameters - serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName()); + serdeParams = new SerDeParameters(conf, tbl, getClass().getName()); fieldDelimited = tbl.getProperty(serdeConstants.FIELD_DELIM); if (fieldDelimited == null || fieldDelimited.isEmpty()) { @@ -103,12 +105,12 @@ public void initialize(Configuration conf, Properties tbl) throws SerDeException // get the collection separator and map key separator // TODO: use serdeConstants.COLLECTION_DELIM when the typo is fixed - collSep = LazySimpleSerDe.getByte(tbl.getProperty(COLLECTION_DELIM), + collSep = LazyUtils.getByte(tbl.getProperty(COLLECTION_DELIM), DEFAULT_SEPARATORS[1]); - keySep = LazySimpleSerDe.getByte(tbl.getProperty(serdeConstants.MAPKEY_DELIM), + keySep = LazyUtils.getByte(tbl.getProperty(serdeConstants.MAPKEY_DELIM), DEFAULT_SEPARATORS[2]); - serdeParams.getSeparators()[1] = collSep; - serdeParams.getSeparators()[2] = keySep; + serdeParams.setSeparator(1, collSep); + serdeParams.setSeparator(2, keySep); // Create the ObjectInspectors for the fields cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams @@ -200,7 +202,7 @@ public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDe // is the binary data. private static void serializeNoEncode(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, - Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) + Text nullSequence, boolean escaped, byte escapeChar, Map needsEscape) throws IOException, SerDeException { if (obj == null) { out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); diff --git a/data/files/nestedcomplex_additional.txt b/data/files/nestedcomplex_additional.txt new file mode 100755 index 0000000000000000000000000000000000000000..c114272d80ed55a317f5e9ba15f10a480f1ba77f --- /dev/null +++ b/data/files/nestedcomplex_additional.txt @@ -0,0 +1,2 @@ +1123k2v2k1v1a10100 +1123k2v2k1v1a10100 diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java index d3e5c7505ad90a46ad32b8986a0c550242023d55..4fe8734f0cfa2bbb90e77973a8f593219ac185f6 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/DefaultHBaseKeyFactory.java @@ -25,7 +25,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java index 5d9cba76fb3745141b9d8a01df7852f1dcd8c11c..0cb4c48b05fa51d4373d73291fcd9cc2975327c3 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java @@ -23,7 +23,7 @@ import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java index 3bbab204b2beae3b7e6916280447798d733f5e9c..4e89aa147cc1c44466e8b6085621475d319d8b78 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.LazyUtils; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; @@ -44,7 +44,7 @@ private final HBaseKeyFactory keyFactory; private final HBaseSerDeParameters hbaseParam; - private final LazySimpleSerDe.SerDeParameters serdeParam; + private final SerDeParameters serdeParam; private final int keyIndex; private final int timestampIndex; @@ -54,9 +54,7 @@ private final byte[] separators; // the separators array private final boolean escaped; // whether we need to escape the data when writing out private final byte escapeChar; // which char to use as the escape char, e.g. '\\' - private final boolean[] needsEscape; // which chars need to be escaped. This array should have size - // of 128. Negative byte values (or byte values >= 128) are - // never escaped. + private final Map needsEscape; // which chars need to be escaped. private final long putTimestamp; private final ByteStream.Output output = new ByteStream.Output(); diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java index 47e20d5d747616b3855f8e461eb0054d28b7a6ef..73bfd2ec87d5b34ec7e359b74c43984ab4ba8b54 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java @@ -34,7 +34,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.SerDeStats; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -51,7 +51,8 @@ serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, serdeConstants.ESCAPE_CHAR, serdeConstants.SERIALIZATION_ENCODING, - LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, + SerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, + SerDeParameters.SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS, HBaseSerDe.HBASE_COLUMNS_MAPPING, HBaseSerDe.HBASE_TABLE_NAME, HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, @@ -240,7 +241,7 @@ public static ColumnMappings parseColumnsMapping( return new ColumnMappings(columnsMapping, rowKeyIndex, timestampIndex); } - public LazySimpleSerDe.SerDeParameters getSerdeParams() { + public SerDeParameters getSerdeParams() { return serdeParams.getSerdeParams(); } diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java index a43520cdca79a6563c063ead6b8a23fdf787d3d1..6fb2ffeaaa2391671e9fc0300e3cee7ca1da382a 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java @@ -33,9 +33,8 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.util.ReflectionUtils; import javax.annotation.Nullable; @@ -92,7 +91,7 @@ columnMappings.toTypesString(tbl, job, autogenerate)); } - this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName); + this.serdeParams = new SerDeParameters(job, tbl, serdeName); this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1")); columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), diff --git a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java index 51a0e225706d0e274073af8ab4d984f8e13d5a46..081df5f61e089abb1487e26b6926a0dd70d69918 100644 --- a/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java +++ b/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazyObjectBase; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -38,7 +38,7 @@ * */ public class DefaultHBaseValueFactory implements HBaseValueFactory { - protected LazySimpleSerDe.SerDeParameters serdeParams; + protected SerDeParameters serdeParams; protected ColumnMappings columnMappings; protected HBaseSerDeParameters hbaseParams; protected Properties properties; diff --git a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java index 42b24443490503274b3b58002a776dea469e6b90..08462865e9de39f03cc71a91b4ea1c7e2a9f2b19 100644 --- a/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java +++ b/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java @@ -55,13 +55,13 @@ import org.apache.hadoop.hive.hbase.avro.OfficePhone; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.hive.serde2.lazy.LazyStruct; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -1413,7 +1413,7 @@ private Properties createPropertiesForHiveAvroColumnFamilyMap() { "org.apache.hadoop.hive.hbase.avro.Employee"); tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, "cola:prefixB_.*"); tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true"); - tbl.setProperty(LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true"); + tbl.setProperty(SerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, "true"); return tbl; } diff --git a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java index 36834b1f42925abfc0ebee44dde56cd407fda1d3..d8e9407dd755c3ae94575b073ebd9c22c5e136b7 100644 --- a/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java +++ b/hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.io.BytesWritable; import java.io.IOException; @@ -89,7 +90,7 @@ public DelimitedInputWriter(String[] colNamesForFields, String delimiter, throws ClassNotFoundException, ConnectionError, SerializationError, InvalidColumn, StreamingException { this(colNamesForFields, delimiter, endPoint, conf, - (char) LazySimpleSerDe.DefaultSeparators[0]); + (char) SerDeParameters.DefaultSeparators[0]); } /** diff --git a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java index 6d8760627b66728ef43dd9d0fd3ccba0508429da..9923acedd20a1a4f84b0fa711a70769855648800 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java @@ -34,8 +34,8 @@ import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.JobConf; @@ -144,7 +144,7 @@ private void add(Text line) throws HiveException { int firstEnd = 0; int i = 0; for (int index = 0; index < bytes.length; index++) { - if (bytes[index] == LazySimpleSerDe.DefaultSeparators[0]) { + if (bytes[index] == SerDeParameters.DefaultSeparators[0]) { i++; firstEnd = index; } @@ -169,7 +169,7 @@ private void add(Text line) throws HiveException { int currentStart = firstEnd + 1; int currentEnd = firstEnd + 1; for (; currentEnd < bytes.length; currentEnd++) { - if (bytes[currentEnd] == LazySimpleSerDe.DefaultSeparators[1]) { + if (bytes[currentEnd] == SerDeParameters.DefaultSeparators[1]) { String one_offset = new String(bytes, currentStart, currentEnd - currentStart); Long offset = Long.parseLong(one_offset); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java index b62ffed94b86dfd9061e7ec67e36338efb3a0250..528e42ab5b5207eee51d6ed82a105fa6ac771ef5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/PlanUtils.java @@ -65,6 +65,7 @@ import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SequenceFileInputFormat; @@ -287,7 +288,7 @@ public static TableDesc getDefaultQueryOutputTableDesc(String cols, String colTy tblDesc.getProperties().setProperty(serdeConstants.ESCAPE_CHAR, "\\"); //enable extended nesting levels tblDesc.getProperties().setProperty( - LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true"); + SerDeParameters.SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS, "true"); return tblDesc; } diff --git a/ql/src/test/queries/clientpositive/nested_complex_additional.q b/ql/src/test/queries/clientpositive/nested_complex_additional.q new file mode 100644 index 0000000000000000000000000000000000000000..5677737d9af8d8950afa50bfe8238620de9672cd --- /dev/null +++ b/ql/src/test/queries/clientpositive/nested_complex_additional.q @@ -0,0 +1,21 @@ +create table nestedcomplex_additional ( +simple_int int, +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>>>, +max_nested_map array>>>>>>>>>>>>>>>>>>>>>>>>>, +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>>>>, +simple_string string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'hive.serialization.extend.additional.nesting.levels'='true', + 'line.delim'='\n' +) +; + +describe nestedcomplex_additional; +describe extended nestedcomplex_additional; + + +load data local inpath '../../data/files/nestedcomplex_additional.txt' overwrite into table nestedcomplex_additional; + +select * from nestedcomplex_additional sort by simple_int; diff --git a/ql/src/test/results/clientpositive/nested_complex_additional.q.out b/ql/src/test/results/clientpositive/nested_complex_additional.q.out new file mode 100644 index 0000000000000000000000000000000000000000..9da9d2f7c4403a7a8795f889b162904b2bafcd98 --- /dev/null +++ b/ql/src/test/results/clientpositive/nested_complex_additional.q.out @@ -0,0 +1,72 @@ +PREHOOK: query: create table nestedcomplex_additional ( +simple_int int, +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>>>, +max_nested_map array>>>>>>>>>>>>>>>>>>>>>>>>>, +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>>>>, +simple_string string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'hive.serialization.extend.additional.nesting.levels'='true', + 'line.delim'='\n' +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@nestedcomplex_additional +POSTHOOK: query: create table nestedcomplex_additional ( +simple_int int, +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>>>, +max_nested_map array>>>>>>>>>>>>>>>>>>>>>>>>>, +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>>>>, +simple_string string) +ROW FORMAT SERDE + 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' +WITH SERDEPROPERTIES ( + 'hive.serialization.extend.additional.nesting.levels'='true', + 'line.delim'='\n' +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@nestedcomplex_additional +PREHOOK: query: describe nestedcomplex_additional +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nestedcomplex_additional +POSTHOOK: query: describe nestedcomplex_additional +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nestedcomplex_additional +simple_int int +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>>> +max_nested_map array>>>>>>>>>>>>>>>>>>>>>>>>> +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>>>> +simple_string string +PREHOOK: query: describe extended nestedcomplex_additional +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@nestedcomplex_additional +POSTHOOK: query: describe extended nestedcomplex_additional +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@nestedcomplex_additional +simple_int int +max_nested_array array>>>>>>>>>>>>>>>>>>>>>>>> +max_nested_map array>>>>>>>>>>>>>>>>>>>>>>>>> +max_nested_struct array>>>>>>>>>>>>>>>>>>>>>>>>> +simple_string string + +#### A masked pattern was here #### +PREHOOK: query: load data local inpath '../../data/files/nestedcomplex_additional.txt' overwrite into table nestedcomplex_additional +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@nestedcomplex_additional +POSTHOOK: query: load data local inpath '../../data/files/nestedcomplex_additional.txt' overwrite into table nestedcomplex_additional +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@nestedcomplex_additional +PREHOOK: query: select * from nestedcomplex_additional sort by simple_int +PREHOOK: type: QUERY +PREHOOK: Input: default@nestedcomplex_additional +#### A masked pattern was here #### +POSTHOOK: query: select * from nestedcomplex_additional sort by simple_int +POSTHOOK: type: QUERY +POSTHOOK: Input: default@nestedcomplex_additional +#### A masked pattern was here #### +1 [[[[[[[[[[[[[[[[[[[[[[[[[1,2,3]]]]]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[[[[[{"k2":"v2","k1":"v1"}]]]]]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[[[[[{"s":"a","i":10}]]]]]]]]]]]]]]]]]]]]]]]]] 100 +1 [[[[[[[[[[[[[[[[[[[[[[[[[1,2,3]]]]]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[[[[[{"k2":"v2","k1":"v1"}]]]]]]]]]]]]]]]]]]]]]]]]] [[[[[[[[[[[[[[[[[[[[[[[[[{"s":"a","i":10}]]]]]]]]]]]]]]]]]]]]]]]]] 100 diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeParameters.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeParameters.java new file mode 100644 index 0000000000000000000000000000000000000000..fdca13208757e0a6a30fc011494cf0d815768b25 --- /dev/null +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeParameters.java @@ -0,0 +1,248 @@ +package org.apache.hadoop.hive.serde2; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParameters; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.HiveStringUtils; + +/** + * SerDeParameters. + * + */ +public class SerDeParameters implements LazyObjectInspectorParameters { + public static final byte[] DefaultSeparators = {(byte) 1, (byte) 2, (byte) 3}; + public static final String SERIALIZATION_EXTEND_NESTING_LEVELS + = "hive.serialization.extend.nesting.levels"; + public static final String SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS + = "hive.serialization.extend.additional.nesting.levels"; + + private Properties tableProperties; + private String serdeName; + + // The list of bytes used for the separators in the column (a nested struct + // such as Array> will use multiple separators). + // The list of separators + escapeChar are the bytes required to be escaped. + private byte[] separators; + + private String nullString; + private Text nullSequence; + private TypeInfo rowTypeInfo; + private boolean lastColumnTakesRest; + private List columnNames; + private List columnTypes; + + private boolean escaped; + private byte escapeChar; + // The map from the byte to the flag indicating if the byte needs escape + private Map needsEscape = new HashMap(); + + private boolean extendedBooleanLiteral; + List timestampFormats; + + public SerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException { + this.tableProperties = tbl; + this.serdeName = serdeName; + + nullString = tbl.getProperty( + serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); + nullSequence = new Text(nullString); + + String lastColumnTakesRestString = tbl + .getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST); + lastColumnTakesRest = (lastColumnTakesRestString != null && lastColumnTakesRestString + .equalsIgnoreCase("true")); + + extractColumnInfo(); + + // Create the LazyObject for storing the rows + rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes); + + collectSeparators(tbl); + + // Get the escape information + String escapeProperty = tbl.getProperty(serdeConstants.ESCAPE_CHAR); + escaped = (escapeProperty != null); + if (escaped) { + escapeChar = LazyUtils.getByte(escapeProperty, (byte) '\\'); + needsEscape.put(escapeChar, true); + for (byte b : separators) { + needsEscape.put(b, true); + } + } + + extendedBooleanLiteral = (job == null ? false : + job.getBoolean(ConfVars.HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL.varname, false)); + + String[] timestampFormatsArray = + HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)); + if (timestampFormatsArray != null) { + timestampFormats = Arrays.asList(timestampFormatsArray); + } + } + + /** + * Extracts and set column names and column types from the table properties + * @throws SerDeException + */ + public void extractColumnInfo() throws SerDeException { + // Read the configuration parameters + String columnNameProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMNS); + // NOTE: if "columns.types" is missing, all columns will be of String type + String columnTypeProperty = tableProperties.getProperty(serdeConstants.LIST_COLUMN_TYPES); + + // Parse the configuration parameters + + if (columnNameProperty != null && columnNameProperty.length() > 0) { + columnNames = Arrays.asList(columnNameProperty.split(",")); + } else { + columnNames = new ArrayList(); + } + if (columnTypeProperty == null) { + // Default type: all string + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < columnNames.size(); i++) { + if (i > 0) { + sb.append(":"); + } + sb.append(serdeConstants.STRING_TYPE_NAME); + } + columnTypeProperty = sb.toString(); + } + + columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); + + if (columnNames.size() != columnTypes.size()) { + throw new SerDeException(serdeName + ": columns has " + columnNames.size() + + " elements while columns.types has " + columnTypes.size() + " elements!"); + } + } + + public List getColumnTypes() { + return columnTypes; + } + + public List getColumnNames() { + return columnNames; + } + + public byte[] getSeparators() { + return separators; + } + + public String getNullString() { + return nullString; + } + + public Text getNullSequence() { + return nullSequence; + } + + public TypeInfo getRowTypeInfo() { + return rowTypeInfo; + } + + public boolean isLastColumnTakesRest() { + return lastColumnTakesRest; + } + + public boolean isEscaped() { + return escaped; + } + + public byte getEscapeChar() { + return escapeChar; + } + + public Map getNeedsEscape() { + return needsEscape; + } + + public boolean isExtendedBooleanLiteral() { + return extendedBooleanLiteral; + } + + public List getTimestampFormats() { + return timestampFormats; + } + + public void setSeparator(int index, byte separator) throws SerDeException { + if (index < 0 || index >= separators.length) { + throw new SerDeException("Invalid separator array index value: " + index); + } + + separators[index] = separator; + } + + /** + * To be backward-compatible, initialize the first 3 separators to + * be the given values. Default number of separators to be 8; If only + * hive.serialization.extend.nesting.levels is set, extend the number of + * separators to be 24; if hive.serialization.extend.additional.nesting.levels + * is set, extend the number of separators to 154. + * @param tbl + */ + private void collectSeparators(Properties tbl) { + List separatorCandidates = new ArrayList(); + + String extendNestingValue = tbl.getProperty(SERIALIZATION_EXTEND_NESTING_LEVELS); + String extendAdditionalNestingValue = tbl.getProperty(SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS); + boolean extendedNesting = extendNestingValue != null && extendNestingValue.equalsIgnoreCase("true"); + boolean extendedAdditionalNesting = extendAdditionalNestingValue != null + && extendAdditionalNestingValue.equalsIgnoreCase("true"); + + separatorCandidates.add(LazyUtils.getByte(tbl.getProperty(serdeConstants.FIELD_DELIM, + tbl.getProperty(serdeConstants.SERIALIZATION_FORMAT)), DefaultSeparators[0])); + separatorCandidates.add(LazyUtils.getByte(tbl + .getProperty(serdeConstants.COLLECTION_DELIM), DefaultSeparators[1])); + separatorCandidates.add(LazyUtils.getByte( + tbl.getProperty(serdeConstants.MAPKEY_DELIM), DefaultSeparators[2])); + + //use only control chars that are very unlikely to be part of the string + // the following might/likely to be used in text files for strings + // 9 (horizontal tab, HT, \t, ^I) + // 10 (line feed, LF, \n, ^J), + // 12 (form feed, FF, \f, ^L), + // 13 (carriage return, CR, \r, ^M), + // 27 (escape, ESC, \e [GCC only], ^[). + for (byte b = 4; b <= 8; b++ ) { + separatorCandidates.add(b); + } + separatorCandidates.add((byte)11); + for (byte b = 14; b <= 26; b++ ) { + separatorCandidates.add(b); + } + for (byte b = 28; b <= 31; b++ ) { + separatorCandidates.add(b); + } + + for (byte b = -128; b <= -1; b++ ) { + separatorCandidates.add(b); + } + + int numSeparators = 8; + if(extendedAdditionalNesting) { + numSeparators = separatorCandidates.size(); + + } else if (extendedNesting) { + numSeparators = 24; + } + + separators = new byte[numSeparators]; + for (int i = 0; i < numSeparators; i++) { + separators[i] = separatorCandidates.get(i); + } + } +} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java index 819913b1870880fb6a3ff707f6ac0895b9a0b5c3..99c1096dd718321fa1f70f5d762ccf74d31fcd7a 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/columnar/ColumnarSerDe.java @@ -29,19 +29,17 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazyFactory; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; -import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyObjectInspectorParametersImpl; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Writable; @@ -60,7 +58,9 @@ serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, serdeConstants.ESCAPE_CHAR, serdeConstants.SERIALIZATION_ENCODING, - LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS}) + SerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, + SerDeParameters.SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS + }) public class ColumnarSerDe extends ColumnarSerDeBase { @Override @@ -92,7 +92,7 @@ public ColumnarSerDe() throws SerDeException { @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { - serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName()); + serdeParams = new SerDeParameters(conf, tbl, getClass().getName()); // Create the ObjectInspectors for the fields. Note: Currently // ColumnarObject uses same ObjectInpector as LazyStruct diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java index 87952a10a9a9cf8691630a502899b642eaeafd8e..893e14fb10d7119b362c2a37d975171079ed9b3d 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/columnar/LazyBinaryColumnarSerDe.java @@ -26,8 +26,7 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeSpec; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; -import org.apache.hadoop.hive.serde2.lazy.LazyUtils; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -63,8 +62,8 @@ public String toString() { @Override public void initialize(Configuration conf, Properties tbl) throws SerDeException { - SerDeParameters serdeParams = new SerDeParameters(); - LazyUtils.extractColumnInfo(tbl, serdeParams, getClass().getName()); + SerDeParameters serdeParams = new SerDeParameters(conf, tbl, getClass().getName()); + columnNames = serdeParams.getColumnNames(); columnTypes = serdeParams.getColumnTypes(); diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index 7ebe18264ea5ed021fa92e638e6efbbb569e845d..864d9aabcec0b23fcd5e22c822ef2bba0e5cd01c 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -220,9 +220,9 @@ */ @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[] separators, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, ObjectInspectorOptions option) throws SerDeException { - return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, + return createLazyObjectInspector(typeInfo, separators, separatorIndex, nullSequence, escaped, escapeChar, false, option); } @@ -245,9 +245,9 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, */ @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[] separators, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException { - return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, + return createLazyObjectInspector(typeInfo, separators, separatorIndex, nullSequence, escaped, escapeChar, false, ObjectInspectorOptions.JAVA); } @@ -267,9 +267,9 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, */ @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[] separators, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { - return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped, + return createLazyObjectInspector(typeInfo, separators, separatorIndex, nullSequence, escaped, escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA); } @@ -289,10 +289,10 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, */ @Deprecated public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[] separators, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException { LazyObjectInspectorParametersImpl lazyParams = new LazyObjectInspectorParametersImpl( - escaped, escapeChar, extendedBooleanLiteral, null, separator, nullSequence); + escaped, escapeChar, extendedBooleanLiteral, null, separators, nullSequence); return createLazyObjectInspector(typeInfo, separatorIndex, lazyParams, option); } @@ -332,7 +332,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, .getListElementTypeInfo(), separatorIndex + 1, lazyParams, option), LazyUtils.getSeparator(lazyParams.getSeparators(), separatorIndex), lazyParams); - case STRUCT: + case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; List fieldNames = structTypeInfo.getAllStructFieldNames(); List fieldTypeInfos = structTypeInfo @@ -347,7 +347,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, fieldNames, fieldObjectInspectors, null, LazyUtils.getSeparator(lazyParams.getSeparators(), separatorIndex), lazyParams, option); - case UNION: + case UNION: UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo; List lazyOIs = new ArrayList(); for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) { @@ -357,7 +357,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs, LazyUtils.getSeparator(lazyParams.getSeparators(), separatorIndex), lazyParams); - } + } throw new RuntimeException("Hive LazySerDe Internal error."); } @@ -396,7 +396,7 @@ public static ObjectInspector createLazyStructInspector( */ @Deprecated public static ObjectInspector createLazyStructInspector( - List columnNames, List typeInfos, byte[] separators, + List columnNames, List typeInfos, byte[] separators, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { LazyObjectInspectorParametersImpl lazyParams = new LazyObjectInspectorParametersImpl( diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index d25c2ff004f259c6dc536c48629b8a4f79a938d8..f726a78de1d7569af6b7f90fcd0f3f94eb0099bc 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -19,13 +19,14 @@ package org.apache.hadoop.hive.serde2.lazy; import java.io.IOException; -import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; +import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -33,8 +34,8 @@ import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe; import org.apache.hadoop.hive.serde2.ByteStream; -import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.SerDeSpec; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -58,6 +59,7 @@ import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.HiveStringUtils; + /** * LazySimpleSerDe can be used to read the same data format as * MetadataTypedColumnsetSerDe and TCTLSeparatedProtocol. @@ -75,16 +77,15 @@ serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, serdeConstants.ESCAPE_CHAR, serdeConstants.SERIALIZATION_ENCODING, - LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS}) + SerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, + SerDeParameters.SERIALIZATION_EXTEND_ADDITIONAL_NESTING_LEVELS + }) public class LazySimpleSerDe extends AbstractEncodingAwareSerDe { public static final Log LOG = LogFactory.getLog(LazySimpleSerDe.class .getName()); - public static final String SERIALIZATION_EXTEND_NESTING_LEVELS - = "hive.serialization.extend.nesting.levels"; - - public static final byte[] DefaultSeparators = {(byte) 1, (byte) 2, (byte) 3}; + private SerDeParameters serdeParams = null; private ObjectInspector cachedObjectInspector; @@ -97,11 +98,11 @@ public String toString() { return getClass().toString() + "[" - + Arrays.asList(serdeParams.separators) + + Arrays.asList(serdeParams.getSeparators()) + ":" - + ((StructTypeInfo) serdeParams.rowTypeInfo).getAllStructFieldNames() + + ((StructTypeInfo) serdeParams.getRowTypeInfo()).getAllStructFieldNames() + ":" - + ((StructTypeInfo) serdeParams.rowTypeInfo) + + ((StructTypeInfo) serdeParams.getRowTypeInfo()) .getAllStructFieldTypeInfos() + "]"; } @@ -109,100 +110,6 @@ public LazySimpleSerDe() throws SerDeException { } /** - * Return the byte value of the number string. - * - * @param altValue - * The string containing a number. - * @param defaultVal - * If the altValue does not represent a number, return the - * defaultVal. - */ - public static byte getByte(String altValue, byte defaultVal) { - if (altValue != null && altValue.length() > 0) { - try { - return Byte.valueOf(altValue).byteValue(); - } catch (NumberFormatException e) { - return (byte) altValue.charAt(0); - } - } - return defaultVal; - } - - /** - * SerDeParameters. - * - */ - public static class SerDeParameters implements LazyObjectInspectorParameters { - byte[] separators = DefaultSeparators; - String nullString; - Text nullSequence; - TypeInfo rowTypeInfo; - boolean lastColumnTakesRest; - List columnNames; - List columnTypes; - - boolean escaped; - byte escapeChar; - boolean[] needsEscape; - - boolean extendedBooleanLiteral; - List timestampFormats; - - public SerDeParameters() { - } - - public List getColumnTypes() { - return columnTypes; - } - - public List getColumnNames() { - return columnNames; - } - - public byte[] getSeparators() { - return separators; - } - - public String getNullString() { - return nullString; - } - - public Text getNullSequence() { - return nullSequence; - } - - public TypeInfo getRowTypeInfo() { - return rowTypeInfo; - } - - public boolean isLastColumnTakesRest() { - return lastColumnTakesRest; - } - - public boolean isEscaped() { - return escaped; - } - - public byte getEscapeChar() { - return escapeChar; - } - - public boolean[] getNeedsEscape() { - return needsEscape; - } - - public boolean isExtendedBooleanLiteral() { - return extendedBooleanLiteral; - } - - public List getTimestampFormats() { - return timestampFormats; - } - } - - SerDeParameters serdeParams = null; - - /** * Initialize the SerDe given the parameters. serialization.format: separator * char or byte code (only supports byte-value up to 127) columns: * ","-separated column names columns.types: ",", ":", or ";"-separated column @@ -216,8 +123,7 @@ public void initialize(Configuration job, Properties tbl) super.initialize(job, tbl); - serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, getClass() - .getName()); + serdeParams = new SerDeParameters(job, tbl, getClass().getName()); // Create the ObjectInspectors for the fields cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams @@ -227,10 +133,10 @@ public void initialize(Configuration job, Properties tbl) .createLazyObject(cachedObjectInspector); LOG.debug(getClass().getName() + " initialized with: columnNames=" - + serdeParams.columnNames + " columnTypes=" + serdeParams.columnTypes - + " separator=" + Arrays.asList(serdeParams.separators) - + " nullstring=" + serdeParams.nullString + " lastColumnTakesRest=" - + serdeParams.lastColumnTakesRest + " timestampFormats=" + serdeParams.timestampFormats); + + serdeParams.getColumnNames() + " columnTypes=" + serdeParams.getColumnTypes() + + " separator=" + Arrays.asList(serdeParams.getSeparators()) + + " nullstring=" + serdeParams.getNullString() + " lastColumnTakesRest=" + + serdeParams.isLastColumnTakesRest() + " timestampFormats=" + serdeParams.getTimestampFormats()); serializedSize = 0; stats = new SerDeStats(); @@ -238,115 +144,6 @@ public void initialize(Configuration job, Properties tbl) lastOperationDeserialize = false; } - public static SerDeParameters initSerdeParams(Configuration job, - Properties tbl, String serdeName) throws SerDeException { - SerDeParameters serdeParams = new SerDeParameters(); - // Read the separators: We use 8 levels of separators by default, - // and 24 if SERIALIZATION_EXTEND_NESTING_LEVELS is set to true - // The levels possible are the set of control chars that we can use as - // special delimiters, ie they should absent in the data or escaped. - // To increase this level further, we need to stop relying - // on single control chars delimiters - - serdeParams.separators = new byte[8]; - serdeParams.separators[0] = getByte(tbl.getProperty(serdeConstants.FIELD_DELIM, - tbl.getProperty(serdeConstants.SERIALIZATION_FORMAT)), DefaultSeparators[0]); - serdeParams.separators[1] = getByte(tbl - .getProperty(serdeConstants.COLLECTION_DELIM), DefaultSeparators[1]); - serdeParams.separators[2] = getByte( - tbl.getProperty(serdeConstants.MAPKEY_DELIM), DefaultSeparators[2]); - String extendedNesting = - tbl.getProperty(SERIALIZATION_EXTEND_NESTING_LEVELS); - if(extendedNesting == null || !extendedNesting.equalsIgnoreCase("true")){ - //use the default smaller set of separators for backward compatibility - for (int i = 3; i < serdeParams.separators.length; i++) { - serdeParams.separators[i] = (byte) (i + 1); - } - } - else{ - //If extended nesting is enabled, set the extended set of separator chars - - final int MAX_CTRL_CHARS = 29; - byte[] extendedSeparators = new byte[MAX_CTRL_CHARS]; - int extendedSeparatorsIdx = 0; - - //get the first 3 separators that have already been set (defaults to 1,2,3) - for(int i = 0; i < 3; i++){ - extendedSeparators[extendedSeparatorsIdx++] = serdeParams.separators[i]; - } - - for (byte asciival = 4; asciival <= MAX_CTRL_CHARS; asciival++) { - - //use only control chars that are very unlikely to be part of the string - // the following might/likely to be used in text files for strings - // 9 (horizontal tab, HT, \t, ^I) - // 10 (line feed, LF, \n, ^J), - // 12 (form feed, FF, \f, ^L), - // 13 (carriage return, CR, \r, ^M), - // 27 (escape, ESC, \e [GCC only], ^[). - - //reserving the following values for future dynamic level impl - // 30 - // 31 - - switch(asciival){ - case 9: - case 10: - case 12: - case 13: - case 27: - continue; - } - extendedSeparators[extendedSeparatorsIdx++] = asciival; - } - - serdeParams.separators = - Arrays.copyOfRange(extendedSeparators, 0, extendedSeparatorsIdx); - } - - serdeParams.nullString = tbl.getProperty( - serdeConstants.SERIALIZATION_NULL_FORMAT, "\\N"); - serdeParams.nullSequence = new Text(serdeParams.nullString); - - String lastColumnTakesRestString = tbl - .getProperty(serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST); - serdeParams.lastColumnTakesRest = (lastColumnTakesRestString != null && lastColumnTakesRestString - .equalsIgnoreCase("true")); - - LazyUtils.extractColumnInfo(tbl, serdeParams, serdeName); - - // Create the LazyObject for storing the rows - serdeParams.rowTypeInfo = TypeInfoFactory.getStructTypeInfo( - serdeParams.columnNames, serdeParams.columnTypes); - - // Get the escape information - String escapeProperty = tbl.getProperty(serdeConstants.ESCAPE_CHAR); - serdeParams.escaped = (escapeProperty != null); - if (serdeParams.escaped) { - serdeParams.escapeChar = getByte(escapeProperty, (byte) '\\'); - } - if (serdeParams.escaped) { - serdeParams.needsEscape = new boolean[128]; - for (int i = 0; i < 128; i++) { - serdeParams.needsEscape[i] = false; - } - serdeParams.needsEscape[serdeParams.escapeChar] = true; - for (int i = 0; i < serdeParams.separators.length; i++) { - serdeParams.needsEscape[serdeParams.separators[i]] = true; - } - } - - serdeParams.extendedBooleanLiteral = job == null ? false : - job.getBoolean(ConfVars.HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL.varname, false); - - String[] timestampFormatsArray = - HiveStringUtils.splitAndUnEscape(tbl.getProperty(serdeConstants.TIMESTAMP_FORMATS)); - if (timestampFormatsArray != null) { - serdeParams.timestampFormats = Arrays.asList(timestampFormatsArray); - } - return serdeParams; - } - // The object for storing row data LazyStruct cachedLazyStruct; @@ -420,7 +217,7 @@ public Writable doSerialize(Object obj, ObjectInspector objInspector) StructObjectInspector soi = (StructObjectInspector) objInspector; List fields = soi.getAllStructFieldRefs(); List list = soi.getStructFieldsDataAsList(obj); - List declaredFields = (serdeParams.rowTypeInfo != null && ((StructTypeInfo) serdeParams.rowTypeInfo) + List declaredFields = (serdeParams.getRowTypeInfo() != null && ((StructTypeInfo) serdeParams.getRowTypeInfo()) .getAllStructFieldNames().size() > 0) ? ((StructObjectInspector) getObjectInspector()) .getAllStructFieldRefs() : null; @@ -432,7 +229,7 @@ public Writable doSerialize(Object obj, ObjectInspector objInspector) for (int i = 0; i < fields.size(); i++) { // Append the separator if needed. if (i > 0) { - serializeStream.write(serdeParams.separators[0]); + serializeStream.write(serdeParams.getSeparators()[0]); } // Get the field objectInspector and the field object. ObjectInspector foi = fields.get(i).getFieldObjectInspector(); @@ -441,7 +238,7 @@ public Writable doSerialize(Object obj, ObjectInspector objInspector) if (declaredFields != null && i >= declaredFields.size()) { throw new SerDeException("Error: expecting " + declaredFields.size() + " but asking for field " + i + "\n" + "data=" + obj + "\n" - + "tableType=" + serdeParams.rowTypeInfo.toString() + "\n" + + "tableType=" + serdeParams.getRowTypeInfo().toString() + "\n" + "dataType=" + TypeInfoUtils.getTypeInfoFromObjectInspector(objInspector)); } @@ -462,8 +259,8 @@ public Writable doSerialize(Object obj, ObjectInspector objInspector) protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector objInspector, SerDeParameters serdeParams) throws SerDeException { try { - serialize(out, obj, objInspector, serdeParams.separators, 1, serdeParams.nullSequence, - serdeParams.escaped, serdeParams.escapeChar, serdeParams.needsEscape); + serialize(out, obj, objInspector, serdeParams.getSeparators(), 1, serdeParams.getNullSequence(), + serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams.getNeedsEscape()); } catch (IOException e) { throw new SerDeException(e); } @@ -489,15 +286,13 @@ protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector * @param escapeChar * Which char to use as the escape char, e.g. '\\' * @param needsEscape - * Which chars needs to be escaped. This array should have size of - * 128. Negative byte values (or byte values >= 128) are never - * escaped. + * Which byte needs to be escaped. * @throws IOException * @throws SerDeException */ public static void serialize(ByteStream.Output out, Object obj, ObjectInspector objInspector, byte[] separators, int level, - Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) + Text nullSequence, boolean escaped, byte escapeChar, Map needsEscape) throws IOException, SerDeException { if (obj == null) { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 3943508685158349f3ff68cfe0543628c613047e..ea6df823b490629395a41664b74fd40dac4aebb5 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -23,16 +23,13 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Properties; +import java.util.Map; import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; @@ -48,7 +45,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; @@ -142,16 +138,15 @@ public static String convertToString(byte[] bytes, int start, int length) { * @param escapeChar * if escaped, the char for prefixing special characters. * @param needsEscape - * if escaped, whether a specific character needs escaping. This - * array should have size of 128. + * if escaped, whether a specific character needs escaping. */ public static void writeEscaped(OutputStream out, byte[] bytes, int start, - int len, boolean escaped, byte escapeChar, boolean[] needsEscape) + int len, boolean escaped, byte escapeChar, Map needsEscape) throws IOException { if (escaped) { int end = start + len; for (int i = start; i <= end; i++) { - if (i == end || (bytes[i] >= 0 && needsEscape[bytes[i]])) { + if (i == end || (needsEscape.containsKey(bytes[i]) && needsEscape.get(bytes[i]) )) { if (i > start) { out.write(bytes, start, i - start); } @@ -176,12 +171,11 @@ public static void writeEscaped(OutputStream out, byte[] bytes, int start, * @param o * The primitive Object * @param needsEscape - * Whether a character needs escaping. This array should have size of - * 128. + * Whether a character needs escaping. */ public static void writePrimitiveUTF8(OutputStream out, Object o, PrimitiveObjectInspector oi, boolean escaped, byte escapeChar, - boolean[] needsEscape) throws IOException { + Map needsEscape) throws IOException { switch (oi.getPrimitiveCategory()) { case BOOLEAN: { @@ -341,42 +335,7 @@ public static int hashBytes(byte[] data, int start, int len) { return hash; } - public static void extractColumnInfo(Properties tbl, SerDeParameters serdeParams, - String serdeName) throws SerDeException { - // Read the configuration parameters - String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS); - // NOTE: if "columns.types" is missing, all columns will be of String type - String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES); - // Parse the configuration parameters - - if (columnNameProperty != null && columnNameProperty.length() > 0) { - serdeParams.columnNames = Arrays.asList(columnNameProperty.split(",")); - } else { - serdeParams.columnNames = new ArrayList(); - } - if (columnTypeProperty == null) { - // Default type: all string - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < serdeParams.columnNames.size(); i++) { - if (i > 0) { - sb.append(":"); - } - sb.append(serdeConstants.STRING_TYPE_NAME); - } - columnTypeProperty = sb.toString(); - } - - serdeParams.columnTypes = TypeInfoUtils - .getTypeInfosFromTypeString(columnTypeProperty); - - if (serdeParams.columnNames.size() != serdeParams.columnTypes.size()) { - throw new SerDeException(serdeName + ": columns has " - + serdeParams.columnNames.size() - + " elements while columns.types has " - + serdeParams.columnTypes.size() + " elements!"); - } - } /** * gets a byte[] with copy of data from source BytesWritable @@ -404,10 +363,7 @@ static byte getSeparator(byte[] separators, int level) throws SerDeException { String msg = "Number of levels of nesting supported for " + "LazySimpleSerde is " + (separators.length - 1) + " Unable to work with level " + level; - if(separators.length < 9){ - msg += ". Use " + LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS + - " serde property for tables using LazySimpleSerde."; - } + throw new SerDeException(msg, e); } } @@ -450,6 +406,26 @@ public static void copyAndEscapeStringDataToText(byte[] inputBytes, int start, i } } + /** + * Return the byte value of the number string. + * + * @param altValue + * The string containing a number. + * @param defaultVal + * If the altValue does not represent a number, return the + * defaultVal. + */ + public static byte getByte(String altValue, byte defaultVal) { + if (altValue != null && altValue.length() > 0) { + try { + return Byte.valueOf(altValue).byteValue(); + } catch (NumberFormatException e) { + return (byte) altValue.charAt(0); + } + } + return defaultVal; + } + private LazyUtils() { // prevent instantiation } diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java index 48f3b029c00a68b90857a95765d2468a03844561..3be99d059b340ad9b9e49fad35c7b51078ea02a3 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java @@ -30,6 +30,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap; @@ -638,7 +639,7 @@ public void testLazyStructNested() throws Throwable { private void testNestedinArrayAtLevelExtended(int nestingLevel, ObjectInspector.Category dtype) throws SerDeException { Properties tableProp = new Properties(); - tableProp.setProperty(LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true"); + tableProp.setProperty(SerDeParameters.SERIALIZATION_EXTEND_NESTING_LEVELS, "true"); testNestedinArrayAtLevel(nestingLevel, dtype, tableProp); } @@ -693,9 +694,10 @@ private void testNestedinArrayAtLevel(int nestingLevel, tableProp.setProperty("columns", "narray"); tableProp.setProperty("columns.types", schema.toString()); SerDeUtils.initializeSerDe(serDe, conf, tableProp, null); - + SerDeParameters serdeParams = new SerDeParameters(conf, tableProp, LazySimpleSerDe.class.getName()); + //create the serialized string for type - byte[] separators = serDe.serdeParams.getSeparators(); + byte[] separators = serdeParams.getSeparators(); System.err.println("Using separator " + (char)separators[nestingLevel]); byte [] serializedRow = null; switch(dtype){ diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java index cf86973270ebc76249f39b3f7253b0981a45c0ba..1617b87d90bb77eaa03b9513a7537fdc4c71d8d4 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.serde2.lazy; +import java.io.IOException; +import java.util.ArrayList; import java.util.List; import java.util.Properties; @@ -25,13 +27,21 @@ import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.ByteStream; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; +import org.apache.hadoop.hive.serde2.objectinspector.TestSimpleMapEqualComparer.TextStringMapHolder; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; @@ -79,41 +89,7 @@ public void testLazySimpleSerDe() throws Throwable { } } - private void deserializeAndSerialize(LazySimpleSerDe serDe, Text t, String s, - Object[] expectedFieldsData) throws SerDeException { - // Get the row structure - StructObjectInspector oi = (StructObjectInspector) serDe - .getObjectInspector(); - List fieldRefs = oi.getAllStructFieldRefs(); - assertEquals(expectedFieldsData.length, fieldRefs.size()); - // Deserialize - Object row = serDe.deserialize(t); - for (int i = 0; i < fieldRefs.size(); i++) { - Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); - if (fieldData != null) { - fieldData = ((LazyPrimitive) fieldData).getWritableObject(); - } - assertEquals("Field " + i, expectedFieldsData[i], fieldData); - } - // Serialize - assertEquals(Text.class, serDe.getSerializedClass()); - Text serializedText = (Text) serDe.serialize(row, oi); - assertEquals("Serialized data", s, serializedText.toString()); - } - - private Properties createProperties() { - Properties tbl = new Properties(); - - // Set the configuration parameters - tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); - tbl.setProperty("columns", - "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); - tbl.setProperty("columns.types", - "tinyint:smallint:int:bigint:double:string:int:string"); - tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); - return tbl; - } /** * Test the LazySimpleSerDe class with LastColumnTakesRest option. @@ -199,5 +175,53 @@ public void testLazySimpleSerDeMissingColumns() throws Throwable { throw e; } } + + Object serializeAndDeserialize(List o1, StructObjectInspector oi1, + LazySimpleSerDe serde, + SerDeParameters serdeParams) throws IOException, SerDeException { + ByteStream.Output serializeStream = new ByteStream.Output(); + LazySimpleSerDe.serialize(serializeStream, o1, oi1, serdeParams + .getSeparators(), 0, serdeParams.getNullSequence(), serdeParams + .isEscaped(), serdeParams.getEscapeChar(), serdeParams + .getNeedsEscape()); + Text t = new Text(serializeStream.toByteArray()); + return serde.deserialize(t); + } + + + private void deserializeAndSerialize(LazySimpleSerDe serDe, Text t, String s, + Object[] expectedFieldsData) throws SerDeException { + // Get the row structure + StructObjectInspector oi = (StructObjectInspector) serDe + .getObjectInspector(); + List fieldRefs = oi.getAllStructFieldRefs(); + assertEquals(expectedFieldsData.length, fieldRefs.size()); + + // Deserialize + Object row = serDe.deserialize(t); + for (int i = 0; i < fieldRefs.size(); i++) { + Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i)); + if (fieldData != null) { + fieldData = ((LazyPrimitive) fieldData).getWritableObject(); + } + assertEquals("Field " + i, expectedFieldsData[i], fieldData); + } + // Serialize + assertEquals(Text.class, serDe.getSerializedClass()); + Text serializedText = (Text) serDe.serialize(row, oi); + assertEquals("Serialized data", s, serializedText.toString()); + } + private Properties createProperties() { + Properties tbl = new Properties(); + + // Set the configuration parameters + tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9"); + tbl.setProperty("columns", + "abyte,ashort,aint,along,adouble,astring,anullint,anullstring"); + tbl.setProperty("columns.types", + "tinyint:smallint:int:bigint:double:string:int:string"); + tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL"); + return tbl; + } } diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java index c58c4270a84d4c7c9fc61878d8bf6b1399e36152..48d5ea5a68ed0ce008520db057f63cdb0f617ee7 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestCrossMapEqualComparer.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.io.Text; @@ -99,8 +99,7 @@ public void testCompatibleType() throws SerDeException, IOException { Properties tbl = new Properties(); tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1)); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); - SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, - LazySimpleSerDe.class.getName()); + SerDeParameters serdeParams = new SerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); @@ -153,8 +152,7 @@ public void testIncompatibleType() throws SerDeException, IOException { Properties tbl = new Properties(); tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1)); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); - SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, - LazySimpleSerDe.class.getName()); + SerDeParameters serdeParams = new SerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java index 6f09b8328dd5d9cfc2dc5c18a760a36b4d499f61..9b2b29ef8e4b746e7c1b14d588e4be549be8d7be 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/objectinspector/TestSimpleMapEqualComparer.java @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters; +import org.apache.hadoop.hive.serde2.SerDeParameters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; import org.apache.hadoop.io.Text; @@ -99,8 +99,7 @@ public void testCompatibleType() throws SerDeException, IOException { Properties tbl = new Properties(); tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1)); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); - SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, - LazySimpleSerDe.class.getName()); + SerDeParameters serdeParams = new SerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector(); @@ -153,8 +152,7 @@ public void testIncompatibleType() throws SerDeException, IOException { Properties tbl = new Properties(); tbl.setProperty(serdeConstants.LIST_COLUMNS, ObjectInspectorUtils.getFieldNames(oi1)); tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi1)); - SerDeParameters serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, - LazySimpleSerDe.class.getName()); + SerDeParameters serdeParams = new SerDeParameters(conf, tbl, LazySimpleSerDe.class.getName()); SerDeUtils.initializeSerDe(serde, conf, tbl, null); ObjectInspector oi2 = serde.getObjectInspector();