diff --git accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java index 46c3c1a..3113935 100644 --- accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java +++ accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloRowSerializer.java @@ -97,7 +97,6 @@ public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDe Object value = columnValues.get(rowIdOffset); // The ObjectInspector for the row ID - ObjectInspector fieldObjectInspector = field.getFieldObjectInspector(); // Serialize the row component using the RowIdFactory. In the normal case, this will just // delegate back to the "local" serializeRowId method @@ -122,7 +121,7 @@ public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDe } // The ObjectInspector for the current column - fieldObjectInspector = field.getFieldObjectInspector(); + ObjectInspector fieldObjectInspector = field.getFieldObjectInspector(); // Make sure we got the right implementation of a ColumnMapping ColumnMapping mapping = mappings.get(i); @@ -273,7 +272,7 @@ protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Outpu } return; case LIST: - char separator = (char) serDeParams.getSeparators()[level]; + byte[] separator = serDeParams.getSeparators()[level]; ListObjectInspector loi = (ListObjectInspector) oi; List list = loi.getList(value); ObjectInspector eoi = loi.getListElementObjectInspector(); @@ -290,8 +289,8 @@ protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Outpu } return; case MAP: - char sep = (char) serDeParams.getSeparators()[level]; - char keyValueSeparator = (char) serDeParams.getSeparators()[level + 1]; + byte[] sep = serDeParams.getSeparators()[level]; + byte[] keyValueSeparator = serDeParams.getSeparators()[level + 1]; MapObjectInspector moi = (MapObjectInspector) oi; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); @@ -315,7 +314,7 @@ protected void writeWithLevel(ObjectInspector oi, Object value, ByteStream.Outpu } return; case STRUCT: - sep = (char) serDeParams.getSeparators()[level]; + sep = serDeParams.getSeparators()[level]; StructObjectInspector soi = (StructObjectInspector) oi; List fields = soi.getAllStructFieldRefs(); list = soi.getStructFieldsDataAsList(value); diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java index 2479fb4..473b1c7 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/TestLazyAccumuloMap.java @@ -40,6 +40,8 @@ */ public class TestLazyAccumuloMap { + private final byte[][] separator = {{(byte) 1}, {(byte) 2}}; + protected byte[] toBytes(int i) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(baos); @@ -65,7 +67,7 @@ public void testStringMapWithProjection() throws SerDeException { // Map of Integer to String Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils - .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, + .getTypeInfosFromTypeString("map").get(0), separator, 0, nullSequence, false, (byte) 0); LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); @@ -97,7 +99,7 @@ public void testIntMap() throws SerDeException, IOException { // Map of Integer to Integer Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils - .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, 0, + .getTypeInfosFromTypeString("map").get(0), separator, 0, nullSequence, false, (byte) 0); LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); @@ -133,7 +135,7 @@ public void testBinaryIntMap() throws SerDeException, IOException { // Map of Integer to String Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils - .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, 0, + .getTypeInfosFromTypeString("map").get(0), separator, 0, nullSequence, false, (byte) 0); LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); @@ -169,7 +171,7 @@ public void testMixedSerializationMap() throws SerDeException, IOException { // Map of Integer to String Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils - .getTypeInfosFromTypeString("map").get(0), new byte[] {(byte) 1, (byte) 2}, 0, + .getTypeInfosFromTypeString("map").get(0), separator, 0, nullSequence, false, (byte) 0); LazyAccumuloMap map = new LazyAccumuloMap((LazyMapObjectInspector) oi); diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java index 706b26e..9cca966 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/mr/TestHiveAccumuloTableOutputFormat.java @@ -340,16 +340,16 @@ public void testWriteMap() throws Exception { TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory - .createLazyObjectInspector(stringTypeInfo, new byte[] {0}, 0, + .createLazyObjectInspector(stringTypeInfo, new byte[][] {{0}}, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( stringOI, stringOI, (byte) ',', (byte) ':', serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); - LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + LazySimpleStructObjectInspector structOI = LazyObjectInspectorFactory .getLazySimpleStructObjectInspector(Arrays.asList("row", "data"), - Arrays.asList(stringOI, mapOI), (byte) ' ', serDeParams.getNullSequence(), + Arrays.asList(stringOI, mapOI), new byte[] {' '}, serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java index 4bb5419..84aa327 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/DelimitedAccumuloRowIdFactory.java @@ -63,7 +63,7 @@ public void init(AccumuloSerDeParameters accumuloSerDeParams, Properties propert @Override public ObjectInspector createRowIdObjectInspector(TypeInfo type) throws SerDeException { - return LazyFactory.createLazyObjectInspector(type, new byte[] {separator}, 0, + return LazyFactory.createLazyObjectInspector(type, new byte[][] {{separator}}, 0, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar()); } diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java index f835a96..301345b 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/FirstCharAccumuloCompositeRowId.java @@ -48,7 +48,7 @@ public Object getField(int fieldID) { // The separator for the hive row would be using \x02, so the separator for this struct would be // \x02 + 1 = \x03 - char separator = (char) ((int) oi.getSeparator() + 1); + char separator = (char) ((int) oi.getSeparator()[0] + 1); // todo log.info("Separator: " + String.format("%04x", (int) separator)); diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java index f613a58..0fffa7d 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloRowSerializer.java @@ -255,7 +255,7 @@ public void testMapSerialization() throws IOException, SerDeException { TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory - .createLazyObjectInspector(stringTypeInfo, new byte[] {0}, 0, + .createLazyObjectInspector(stringTypeInfo, new byte[][] {{0}}, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( @@ -263,7 +263,7 @@ public void testMapSerialization() throws IOException, SerDeException { serDeParams.isEscaped(), serDeParams.getEscapeChar()); LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory - .getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), (byte) ' ', + .getLazySimpleStructObjectInspector(columns, Arrays.asList(stringOI, mapOI), new byte[]{' '}, serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java index 18b84e4..d0c0253 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestAccumuloSerDe.java @@ -391,26 +391,26 @@ public void testStructOfMapSerialization() throws IOException, SerDeException { tableProperties, AccumuloSerDe.class.getSimpleName()); SerDeParameters serDeParams = accumuloSerDeParams.getSerDeParameters(); - byte[] seps = serDeParams.getSeparators(); + byte[][] seps = serDeParams.getSeparators(); // struct_map>> TypeInfo stringTypeInfo = TypeInfoFactory.getPrimitiveTypeInfo(serdeConstants.STRING_TYPE_NAME); LazyStringObjectInspector stringOI = (LazyStringObjectInspector) LazyFactory - .createLazyObjectInspector(stringTypeInfo, new byte[] {0}, 0, + .createLazyObjectInspector(stringTypeInfo, new byte[][] {{0}}, 0, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); LazyMapObjectInspector mapOI = LazyObjectInspectorFactory.getLazySimpleMapObjectInspector( - stringOI, stringOI, seps[3], seps[4], serDeParams.getNullSequence(), + stringOI, stringOI, seps[3][0], seps[4][0], serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); - LazySimpleStructObjectInspector rowStructOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + LazySimpleStructObjectInspector rowStructOI = LazyObjectInspectorFactory .getLazySimpleStructObjectInspector(structColNames, - Arrays. asList(mapOI, mapOI), (byte) seps[2], + Arrays. asList(mapOI, mapOI), seps[2], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); - LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) LazyObjectInspectorFactory + LazySimpleStructObjectInspector structOI = LazyObjectInspectorFactory .getLazySimpleStructObjectInspector(columns, Arrays.asList(rowStructOI, stringOI), seps[1], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar()); diff --git accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java index d464740..569a3af 100644 --- accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java +++ accumulo-handler/src/test/org/apache/hadoop/hive/accumulo/serde/TestDefaultAccumuloRowIdFactory.java @@ -95,7 +95,7 @@ public void testCorrectComplexInspectors() throws SerDeException { Assert.assertEquals(LazyMapObjectInspector.class, OIs.get(1).getClass()); LazySimpleStructObjectInspector structOI = (LazySimpleStructObjectInspector) OIs.get(0); - Assert.assertEquals(2, (int) structOI.getSeparator()); + Assert.assertEquals(2, (int) structOI.getSeparator()[0]); LazyMapObjectInspector mapOI = (LazyMapObjectInspector) OIs.get(1); Assert.assertEquals(2, (int) mapOI.getItemSeparator()); diff --git common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java index a9f5ed6..c19fe41 100644 --- common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java +++ common/src/java/org/apache/hadoop/hive/common/io/NonSyncByteArrayOutputStream.java @@ -67,6 +67,11 @@ public void write(int b) { count += 1; } + @Override + public void write(byte[] b) { + write(b, 0, b.length); + } + private int enLargeBuffer(int increment) { int temp = count + increment; int newLen = temp; diff --git contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java deleted file mode 100644 index 72f4234..0000000 --- contrib/src/java/org/apache/hadoop/hive/contrib/serde2/MultiDelimitSerDe.java +++ /dev/null @@ -1,295 +0,0 @@ -/** - * Copyright 2010 The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.contrib.serde2; - -import java.io.IOException; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.regex.Pattern; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.serde.serdeConstants; -import org.apache.hadoop.hive.serde2.*; -import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; -import org.apache.hadoop.hive.serde2.lazy.LazyFactory; -import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; -import org.apache.hadoop.hive.serde2.lazy.LazyStruct; -import org.apache.hadoop.hive.serde2.lazy.LazyUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StructField; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; - -/** - * This SerDe allows user to use multiple characters as the field delimiter for a table. - * To use this SerDe, user has to specify field.delim in SERDEPROPERTIES. - * If the table contains a column which is a collection or map, user can optionally - * specify collection.delim or mapkey.delim as well. - * Currently field.delim can be multiple character while collection.delim - * and mapkey.delim should be just single character. - */ -@SerDeSpec(schemaProps = { - serdeConstants.LIST_COLUMNS, serdeConstants.LIST_COLUMN_TYPES, - serdeConstants.FIELD_DELIM, serdeConstants.COLLECTION_DELIM, serdeConstants.MAPKEY_DELIM, - serdeConstants.SERIALIZATION_FORMAT, serdeConstants.SERIALIZATION_NULL_FORMAT, - serdeConstants.SERIALIZATION_LAST_COLUMN_TAKES_REST, - serdeConstants.ESCAPE_CHAR, - serdeConstants.SERIALIZATION_ENCODING, - LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS}) -public class MultiDelimitSerDe extends AbstractSerDe { - private static final Log LOG = LogFactory.getLog(MultiDelimitSerDe.class.getName()); - private static final byte[] DEFAULT_SEPARATORS = {(byte) 1, (byte) 2, (byte) 3}; - // Due to HIVE-6404, define our own constant - private static final String COLLECTION_DELIM = "collection.delim"; - - private int numColumns; - private String fieldDelimited; - // we don't support using multiple chars as delimiters within complex types - // collection separator - private byte collSep; - // map key separator - private byte keySep; - - // The object for storing row data - private LazyStruct cachedLazyStruct; - //the lazy struct object inspector - private ObjectInspector cachedObjectInspector; - - // The wrapper for byte array - private ByteArrayRef byteArrayRef; - - private LazySimpleSerDe.SerDeParameters serdeParams = null; - // The output stream of serialized objects - private final ByteStream.Output serializeStream = new ByteStream.Output(); - // The Writable to return in serialize - private final Text serializeCache = new Text(); - - @Override - public void initialize(Configuration conf, Properties tbl) throws SerDeException { - // get the SerDe parameters - serdeParams = LazySimpleSerDe.initSerdeParams(conf, tbl, getClass().getName()); - - fieldDelimited = tbl.getProperty(serdeConstants.FIELD_DELIM); - if (fieldDelimited == null || fieldDelimited.isEmpty()) { - throw new SerDeException("This table does not have serde property \"field.delim\"!"); - } - - // get the collection separator and map key separator - // TODO: use serdeConstants.COLLECTION_DELIM when the typo is fixed - collSep = LazySimpleSerDe.getByte(tbl.getProperty(COLLECTION_DELIM), - DEFAULT_SEPARATORS[1]); - keySep = LazySimpleSerDe.getByte(tbl.getProperty(serdeConstants.MAPKEY_DELIM), - DEFAULT_SEPARATORS[2]); - serdeParams.getSeparators()[1] = collSep; - serdeParams.getSeparators()[2] = keySep; - - // Create the ObjectInspectors for the fields - cachedObjectInspector = LazyFactory.createLazyStructInspector(serdeParams - .getColumnNames(), serdeParams.getColumnTypes(), serdeParams - .getSeparators(), serdeParams.getNullSequence(), serdeParams - .isLastColumnTakesRest(), serdeParams.isEscaped(), serdeParams - .getEscapeChar()); - - cachedLazyStruct = (LazyStruct) LazyFactory.createLazyObject(cachedObjectInspector); - - assert serdeParams.getColumnNames().size() == serdeParams.getColumnTypes().size(); - numColumns = serdeParams.getColumnNames().size(); - } - - - @Override - public ObjectInspector getObjectInspector() throws SerDeException { - return cachedObjectInspector; - } - - @Override - public Class getSerializedClass() { - return Text.class; - } - - @Override - public Object deserialize(Writable blob) throws SerDeException { - if (byteArrayRef == null) { - byteArrayRef = new ByteArrayRef(); - } - - // we use the default field delimiter('\1') to replace the multiple-char field delimiter - // but we cannot use it to parse the row since column data can contain '\1' as well - String rowStr; - if (blob instanceof BytesWritable) { - BytesWritable b = (BytesWritable) blob; - rowStr = new String(b.getBytes()); - } else if (blob instanceof Text) { - Text rowText = (Text) blob; - rowStr = rowText.toString(); - } else { - throw new SerDeException(getClass() + ": expects either BytesWritable or Text object!"); - } - byteArrayRef.setData(rowStr.replaceAll(Pattern.quote(fieldDelimited), "\1").getBytes()); - cachedLazyStruct.init(byteArrayRef, 0, byteArrayRef.getData().length); - // use the multi-char delimiter to parse the lazy struct - cachedLazyStruct.parseMultiDelimit(rowStr.getBytes(), fieldDelimited.getBytes()); - return cachedLazyStruct; - } - - @Override - public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException { - StructObjectInspector soi = (StructObjectInspector) objInspector; - List fields = soi.getAllStructFieldRefs(); - List list = soi.getStructFieldsDataAsList(obj); - if (fields.size() != numColumns) { - throw new SerDeException("Cannot serialize the object because there are " - + fields.size() + " fields but the table has " + numColumns - + " columns."); - } - - serializeStream.reset(); - // Get all data out. - for (int c = 0; c < numColumns; c++) { - //write the delimiter to the stream, which means we don't need output.format.string anymore - if (c > 0) { - serializeStream.write(fieldDelimited.getBytes(), 0, fieldDelimited.getBytes().length); - } - - Object field = list == null ? null : list.get(c); - ObjectInspector fieldOI = fields.get(c).getFieldObjectInspector(); - - try { - serializeNoEncode(serializeStream, field, fieldOI, serdeParams.getSeparators(), 1, - serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(), - serdeParams.getNeedsEscape()); - } catch (IOException e) { - throw new SerDeException(e); - } - } - - serializeCache.set(serializeStream.getData(), 0, serializeStream.getLength()); - return serializeCache; - } - - // This is basically the same as LazySimpleSerDe.serialize. Except that we don't use - // Base64 to encode binary data because we're using printable string as delimiter. - // Consider such a row "strAQ==\1", str is a string, AQ== is the delimiter and \1 - // is the binary data. - private static void serializeNoEncode(ByteStream.Output out, Object obj, - ObjectInspector objInspector, byte[] separators, int level, - Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) - throws IOException, SerDeException { - if (obj == null) { - out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); - return; - } - - char separator; - List list; - switch (objInspector.getCategory()) { - case PRIMITIVE: - PrimitiveObjectInspector oi = (PrimitiveObjectInspector) objInspector; - if (oi.getPrimitiveCategory() == PrimitiveCategory.BINARY) { - BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(obj); - byte[] toWrite = new byte[bw.getLength()]; - System.arraycopy(bw.getBytes(), 0, toWrite, 0, bw.getLength()); - out.write(toWrite, 0, toWrite.length); - } else { - LazyUtils.writePrimitiveUTF8(out, obj, oi, escaped, escapeChar, needsEscape); - } - return; - case LIST: - separator = (char) separators[level]; - ListObjectInspector loi = (ListObjectInspector) objInspector; - list = loi.getList(obj); - ObjectInspector eoi = loi.getListElementObjectInspector(); - if (list == null) { - out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); - } else { - for (int i = 0; i < list.size(); i++) { - if (i > 0) { - out.write(separator); - } - serializeNoEncode(out, list.get(i), eoi, separators, level + 1, nullSequence, - escaped, escapeChar, needsEscape); - } - } - return; - case MAP: - separator = (char) separators[level]; - char keyValueSeparator = (char) separators[level + 1]; - - MapObjectInspector moi = (MapObjectInspector) objInspector; - ObjectInspector koi = moi.getMapKeyObjectInspector(); - ObjectInspector voi = moi.getMapValueObjectInspector(); - Map map = moi.getMap(obj); - if (map == null) { - out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); - } else { - boolean first = true; - for (Map.Entry entry : map.entrySet()) { - if (first) { - first = false; - } else { - out.write(separator); - } - serializeNoEncode(out, entry.getKey(), koi, separators, level + 2, - nullSequence, escaped, escapeChar, needsEscape); - out.write(keyValueSeparator); - serializeNoEncode(out, entry.getValue(), voi, separators, level + 2, - nullSequence, escaped, escapeChar, needsEscape); - } - } - return; - case STRUCT: - separator = (char) separators[level]; - StructObjectInspector soi = (StructObjectInspector) objInspector; - List fields = soi.getAllStructFieldRefs(); - list = soi.getStructFieldsDataAsList(obj); - if (list == null) { - out.write(nullSequence.getBytes(), 0, nullSequence.getLength()); - } else { - for (int i = 0; i < list.size(); i++) { - if (i > 0) { - out.write(separator); - } - serializeNoEncode(out, list.get(i), fields.get(i).getFieldObjectInspector(), - separators, level + 1, nullSequence, escaped, escapeChar, - needsEscape); - } - } - return; - } - throw new RuntimeException("Unknown category type: "+ objInspector.getCategory()); - } - - @Override - public SerDeStats getSerDeStats() { - // no support for statistics - return null; - } - -} diff --git hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java index 3bbab20..6cde5f3 100644 --- hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java +++ hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java @@ -51,7 +51,7 @@ private final ColumnMapping keyMapping; private final ColumnMapping timestampMapping; private final ColumnMapping[] columnMappings; - private final byte[] separators; // the separators array + private final byte[][] separators; // the separators array private final boolean escaped; // whether we need to escape the data when writing out private final byte escapeChar; // which char to use as the escape char, e.g. '\\' private final boolean[] needsEscape; // which chars need to be escaped. This array should have size @@ -239,7 +239,7 @@ private boolean serialize( (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape); return true; case LIST: - char separator = (char) separators[level]; + byte[] separator = separators[level]; ListObjectInspector loi = (ListObjectInspector)objInspector; List list = loi.getList(obj); ObjectInspector eoi = loi.getListElementObjectInspector(); @@ -255,8 +255,8 @@ private boolean serialize( } return true; case MAP: - char sep = (char) separators[level]; - char keyValueSeparator = (char) separators[level+1]; + byte[] sep = separators[level]; + byte[] keyValueSeparator = separators[level+1]; MapObjectInspector moi = (MapObjectInspector) objInspector; ObjectInspector koi = moi.getMapKeyObjectInspector(); ObjectInspector voi = moi.getMapValueObjectInspector(); @@ -279,7 +279,7 @@ private boolean serialize( } return true; case STRUCT: - sep = (char)separators[level]; + sep = separators[level]; StructObjectInspector soi = (StructObjectInspector)objInspector; List fields = soi.getAllStructFieldRefs(); list = soi.getStructFieldsDataAsList(obj); diff --git hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java index b2bdd19..e07f46b 100644 --- hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java +++ hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestLazyHBaseObject.java @@ -55,6 +55,11 @@ * TestLazyHBaseObject is a test for the LazyHBaseXXX classes. */ public class TestLazyHBaseObject extends TestCase { + + private final byte[][] separator1 = {{(byte) 1}, {(byte) 2}}; + private final byte[][] separator2 = {{(byte)'#'},{(byte)'\t'}}; + private final byte[][] separator3 = {{' '}, {':'}, {'='}}; + /** * Test the LazyMap class with Integer-to-String. * @throws SerDeException @@ -64,7 +69,7 @@ public void testLazyHBaseCellMap1() throws SerDeException { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get(0), - new byte[]{(byte)1, (byte)2}, 0, nullSequence, false, (byte)0); + separator1, 0, nullSequence, false, (byte)0); LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi); @@ -126,7 +131,7 @@ public void testLazyHBaseCellMap2() throws SerDeException { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get(0), - new byte[]{(byte)'#', (byte)'\t'}, 0, nullSequence, false, (byte)0); + separator2, 0, nullSequence, false, (byte)0); LazyHBaseCellMap b = new LazyHBaseCellMap((LazyMapObjectInspector) oi); @@ -189,7 +194,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { Text nullSequence = new Text("\\N"); TypeInfo mapBinaryIntKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); ObjectInspector oi = LazyFactory.createLazyObjectInspector( - mapBinaryIntKeyValue, new byte [] {(byte)1, (byte) 2}, 0, nullSequence, false, (byte) 0); + mapBinaryIntKeyValue, separator1, 0, nullSequence, false, (byte) 0); LazyHBaseCellMap hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); List kvs = new ArrayList(); @@ -232,7 +237,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryByteKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( - mapBinaryByteKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); + mapBinaryByteKeyValue, separator1, 0, nullSequence, false, (byte) 0); hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); byte [] cfByte = "cf-byte".getBytes(); kvs.clear(); @@ -270,7 +275,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryShortKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( - mapBinaryShortKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); + mapBinaryShortKeyValue, separator1, 0, nullSequence, false, (byte) 0); hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); byte [] cfShort = "cf-short".getBytes(); kvs.clear(); @@ -308,7 +313,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryLongKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( - mapBinaryLongKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, (byte) 0); + mapBinaryLongKeyValue, separator1, 0, nullSequence, false, (byte) 0); hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); byte [] cfLong = "cf-long".getBytes(); kvs.clear(); @@ -346,7 +351,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryFloatKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( - mapBinaryFloatKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, + mapBinaryFloatKeyValue, separator1, 0, nullSequence, false, (byte) 0); hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); byte [] cfFloat = "cf-float".getBytes(); @@ -386,7 +391,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryDoubleKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( - mapBinaryDoubleKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, + mapBinaryDoubleKeyValue, separator1, 0, nullSequence, false, (byte) 0); hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); byte [] cfDouble = "cf-double".getBytes(); @@ -425,7 +430,7 @@ public void testLazyHBaseCellMap3() throws SerDeException { TypeInfo mapBinaryBooleanKeyValue = TypeInfoUtils.getTypeInfoFromTypeString("map"); oi = LazyFactory.createLazyObjectInspector( - mapBinaryBooleanKeyValue, new byte [] {(byte) 1, (byte) 2}, 0, nullSequence, false, + mapBinaryBooleanKeyValue, separator1, 0, nullSequence, false, (byte) 0); hbaseCellMap = new LazyHBaseCellMap((LazyMapObjectInspector) oi); byte [] cfBoolean = "cf-boolean".getBytes(); @@ -481,7 +486,7 @@ public void testLazyHBaseRow1() throws SerDeException { } ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, - fieldTypeInfos, new byte[] {' ', ':', '='}, + fieldTypeInfos, separator3, nullSequence, false, false, (byte)0); LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings); @@ -607,7 +612,7 @@ public void testLazyHBaseRow2() throws SerDeException { ObjectInspector oi = LazyFactory.createLazyStructInspector( fieldNames, fieldTypeInfos, - new byte[] {' ', ':', '='}, + separator3, nullSequence, false, false, (byte) 0); LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings); @@ -731,7 +736,7 @@ public void testLazyHBaseRow3() throws SerDeException { ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, fieldTypeInfos, - new byte [] {' ', ':', '='}, nullSequence, false, false, (byte) 0); + separator3, nullSequence, false, false, (byte) 0); LazyHBaseRow o = new LazyHBaseRow((LazySimpleStructObjectInspector) oi, columnMappings); diff --git hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java index 36834b1..dd15510 100644 --- hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java +++ hcatalog/streaming/src/java/org/apache/hive/hcatalog/streaming/DelimitedInputWriter.java @@ -47,7 +47,7 @@ public class DelimitedInputWriter extends AbstractRecordWriter { private final boolean reorderingNeeded; private String delimiter; - private char serdeSeparator; + private final String serdeSeparator; private int[] fieldToColMapping; private final ArrayList tableColumns; private AbstractSerDe serde = null; @@ -89,9 +89,14 @@ public DelimitedInputWriter(String[] colNamesForFields, String delimiter, throws ClassNotFoundException, ConnectionError, SerializationError, InvalidColumn, StreamingException { this(colNamesForFields, delimiter, endPoint, conf, - (char) LazySimpleSerDe.DefaultSeparators[0]); + new String(LazySimpleSerDe.DefaultSeparators[0])); } + public DelimitedInputWriter(String[] colNamesForFields, String delimiter, + HiveEndPoint endPoint, HiveConf conf, char serdeSeparator) + throws ClassNotFoundException, StreamingException { + this(colNamesForFields, delimiter, endPoint, conf, String.valueOf(serdeSeparator)); + } /** * Constructor. Allows overriding separator of the LazySimpleSerde * @param colNamesForFields Column name assignment for input fields @@ -108,7 +113,7 @@ public DelimitedInputWriter(String[] colNamesForFields, String delimiter, * @throws InvalidColumn any element in colNamesForFields refers to a non existing column */ public DelimitedInputWriter(String[] colNamesForFields, String delimiter, - HiveEndPoint endPoint, HiveConf conf, char serdeSeparator) + HiveEndPoint endPoint, HiveConf conf, String serdeSeparator) throws ClassNotFoundException, ConnectionError, SerializationError, InvalidColumn, StreamingException { super(endPoint, conf); @@ -118,11 +123,10 @@ public DelimitedInputWriter(String[] colNamesForFields, String delimiter, this.fieldToColMapping = getFieldReordering(colNamesForFields, getTableColumns()); this.reorderingNeeded = isReorderingNeeded(delimiter, getTableColumns()); LOG.debug("Field reordering needed = " + this.reorderingNeeded + ", for endpoint " + endPoint); - this.serdeSeparator = serdeSeparator; } private boolean isReorderingNeeded(String delimiter, ArrayList tableColumns) { - return !( delimiter.equals(String.valueOf(getSerdeSeparator())) + return !( delimiter.equals(getSerdeSeparator()) && areFieldsInColOrder(fieldToColMapping) && tableColumns.size()>=fieldToColMapping.length ); } @@ -184,7 +188,7 @@ private static boolean areFieldsInColOrder(int[] fieldToColMapping) { // handles nulls in items[] // TODO: perhaps can be made more efficient by creating a byte[] directly - private static byte[] join(String[] items, char separator) { + private static byte[] join(String[] items, String separator) { StringBuffer buff = new StringBuffer(100); if(items.length == 0) return "".getBytes(); @@ -247,7 +251,7 @@ protected LazySimpleSerDe createSerde(Table tbl, HiveConf conf) throws SerializationError { try { Properties tableProps = MetaStoreUtils.getTableMetadata(tbl); - tableProps.setProperty("field.delim", String.valueOf(serdeSeparator)); + tableProps.setProperty("field.delim", getSerdeSeparator()); LazySimpleSerDe serde = new LazySimpleSerDe(); SerDeUtils.initializeSerDe(serde, conf, tableProps, null); return serde; @@ -265,7 +269,7 @@ protected LazySimpleSerDe createSerde(Table tbl, HiveConf conf) return colNames; } - public char getSerdeSeparator() { + public String getSerdeSeparator() { return serdeSeparator; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java index 332ced8..5d4d1ee 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java @@ -143,7 +143,7 @@ private void add(Text line) throws HiveException { int firstEnd = 0; int i = 0; for (int index = 0; index < bytes.length; index++) { - if (bytes[index] == LazySimpleSerDe.DefaultSeparators[0]) { + if (bytes[index] == LazySimpleSerDe.DefaultSeparators[0][0]) { i++; firstEnd = index; } @@ -168,7 +168,7 @@ private void add(Text line) throws HiveException { int currentStart = firstEnd + 1; int currentEnd = firstEnd + 1; for (; currentEnd < bytes.length; currentEnd++) { - if (bytes[currentEnd] == LazySimpleSerDe.DefaultSeparators[1]) { + if (bytes[currentEnd] == LazySimpleSerDe.DefaultSeparators[1][0]) { String one_offset = new String(bytes, currentStart, currentEnd - currentStart); Long offset = Long.parseLong(one_offset); diff --git serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java index 2fb1c28..f72d9d4 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java @@ -86,7 +86,7 @@ * */ public AvroLazyObjectInspector(List structFieldNames, List structFieldObjectInspectors, List structFieldComments, - byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, + byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { super(structFieldNames, structFieldObjectInspectors, structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java index e3968a9..12d64b3 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java @@ -217,7 +217,7 @@ * @throws SerDeException */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[][] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, ObjectInspectorOptions option) throws SerDeException { return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped, escapeChar, false, option); @@ -241,7 +241,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @throws SerDeException */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[][] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException { return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped, escapeChar, false, ObjectInspectorOptions.JAVA); @@ -262,7 +262,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @throws SerDeException */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[][] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped, escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA); @@ -283,7 +283,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * @throws SerDeException */ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, - byte[] separator, int separatorIndex, Text nullSequence, boolean escaped, + byte[][] separator, int separatorIndex, Text nullSequence, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException { ObjectInspector.Category c = typeInfo.getCategory(); switch (c) { @@ -297,14 +297,15 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector( ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator, separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), - LazyUtils.getSeparator(separator, separatorIndex), - LazyUtils.getSeparator(separator, separatorIndex+1), + LazyUtils.getSingleSeparator(separator, separatorIndex), + LazyUtils.getSingleSeparator(separator, separatorIndex+1), nullSequence, escaped, escapeChar); case LIST: return LazyObjectInspectorFactory.getLazySimpleListObjectInspector( createLazyObjectInspector(((ListTypeInfo) typeInfo) .getListElementTypeInfo(), separator, separatorIndex + 1, - nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex), + nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), + LazyUtils.getSingleSeparator(separator, separatorIndex), nullSequence, escaped, escapeChar); case STRUCT: StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo; @@ -332,7 +333,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, escapeChar, extendedBooleanLiteral, option)); } return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs, - LazyUtils.getSeparator(separator, separatorIndex), + LazyUtils.getSingleSeparator(separator, separatorIndex), nullSequence, escaped, escapeChar); } @@ -351,7 +352,7 @@ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo, * boolean, byte) */ public static ObjectInspector createLazyStructInspector( - List columnNames, List typeInfos, byte[] separators, + List columnNames, List typeInfos, byte[][] separators, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) throws SerDeException { return createLazyStructInspector(columnNames, typeInfos, separators, @@ -371,7 +372,7 @@ public static ObjectInspector createLazyStructInspector( * boolean, byte) */ public static ObjectInspector createLazyStructInspector( - List columnNames, List typeInfos, byte[] separators, + List columnNames, List typeInfos, byte[][] separators, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException { ArrayList columnObjectInspectors = new ArrayList( @@ -395,7 +396,7 @@ public static ObjectInspector createLazyStructInspector( * boolean, byte) */ public static ObjectInspector createColumnarStructInspector( - List columnNames, List columnTypes, byte[] separators, + List columnNames, List columnTypes, byte[][] separators, Text nullSequence, boolean escaped, byte escapeChar) throws SerDeException { ArrayList columnObjectInspectors = new ArrayList( columnTypes.size()); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java index 95e30db..a83d899 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.BinaryComparable; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -79,7 +80,7 @@ public static final String SERIALIZATION_EXTEND_NESTING_LEVELS = "hive.serialization.extend.nesting.levels"; - public static final byte[] DefaultSeparators = {(byte) 1, (byte) 2, (byte) 3}; + public static final byte[][] DefaultSeparators = {{(byte) 1}, {(byte) 2}, {(byte) 3}}; private ObjectInspector cachedObjectInspector; @@ -112,23 +113,39 @@ public LazySimpleSerDe() throws SerDeException { * If the altValue does not represent a number, return the * defaultVal. */ - public static byte getByte(String altValue, byte defaultVal) { + private static byte[] getDelimiter(String altValue, byte[] defaultVal) { if (altValue != null && altValue.length() > 0) { - try { - return Byte.valueOf(altValue).byteValue(); - } catch (NumberFormatException e) { - return (byte) altValue.charAt(0); + String[] split = altValue.split(","); + byte[] result = new byte[split.length]; + for (int i = 0; i < split.length; i++) { + try { + result[i] = Byte.valueOf(split[i]).byteValue(); + } catch (NumberFormatException e) { + result[i] = (byte) split[i].charAt(0); + } } } return defaultVal; } + private static byte getSingleDelimiter(String altValue, byte defaultVal) { + byte[] delimiter = getDelimiter(altValue, null); + if (delimiter == null) { + return defaultVal; + } + if (delimiter.length > 1) { + throw new RuntimeException("Expected single byte separator but " + + new BytesWritable(delimiter) + " is acquired"); + } + return delimiter[0]; + } + /** * SerDeParameters. * */ public static class SerDeParameters { - byte[] separators = DefaultSeparators; + byte[][] separators = DefaultSeparators; String nullString; Text nullSequence; TypeInfo rowTypeInfo; @@ -150,7 +167,7 @@ public static byte getByte(String altValue, byte defaultVal) { return columnNames; } - public byte[] getSeparators() { + public byte[][] getSeparators() { return separators; } @@ -234,26 +251,26 @@ public static SerDeParameters initSerdeParams(Configuration job, // To increase this level further, we need to stop relying // on single control chars delimiters - serdeParams.separators = new byte[8]; - serdeParams.separators[0] = getByte(tbl.getProperty(serdeConstants.FIELD_DELIM, + serdeParams.separators = new byte[8][]; + serdeParams.separators[0] = getDelimiter(tbl.getProperty(serdeConstants.FIELD_DELIM, tbl.getProperty(serdeConstants.SERIALIZATION_FORMAT)), DefaultSeparators[0]); - serdeParams.separators[1] = getByte(tbl + serdeParams.separators[1] = getDelimiter(tbl .getProperty(serdeConstants.COLLECTION_DELIM), DefaultSeparators[1]); - serdeParams.separators[2] = getByte( + serdeParams.separators[2] = getDelimiter( tbl.getProperty(serdeConstants.MAPKEY_DELIM), DefaultSeparators[2]); String extendedNesting = tbl.getProperty(SERIALIZATION_EXTEND_NESTING_LEVELS); if(extendedNesting == null || !extendedNesting.equalsIgnoreCase("true")){ //use the default smaller set of separators for backward compatibility for (int i = 3; i < serdeParams.separators.length; i++) { - serdeParams.separators[i] = (byte) (i + 1); + serdeParams.separators[i] = new byte[]{(byte) (i + 1)}; } } else{ //If extended nesting is enabled, set the extended set of separator chars final int MAX_CTRL_CHARS = 29; - byte[] extendedSeparators = new byte[MAX_CTRL_CHARS]; + byte[][] extendedSeparators = new byte[MAX_CTRL_CHARS][]; int extendedSeparatorsIdx = 0; //get the first 3 separators that have already been set (defaults to 1,2,3) @@ -283,7 +300,7 @@ public static SerDeParameters initSerdeParams(Configuration job, case 27: continue; } - extendedSeparators[extendedSeparatorsIdx++] = asciival; + extendedSeparators[extendedSeparatorsIdx++] = new byte[] {asciival}; } serdeParams.separators = @@ -309,7 +326,7 @@ public static SerDeParameters initSerdeParams(Configuration job, String escapeProperty = tbl.getProperty(serdeConstants.ESCAPE_CHAR); serdeParams.escaped = (escapeProperty != null); if (serdeParams.escaped) { - serdeParams.escapeChar = getByte(escapeProperty, (byte) '\\'); + serdeParams.escapeChar = getSingleDelimiter(escapeProperty, (byte) '\\'); } if (serdeParams.escaped) { serdeParams.needsEscape = new boolean[128]; @@ -318,7 +335,7 @@ public static SerDeParameters initSerdeParams(Configuration job, } serdeParams.needsEscape[serdeParams.escapeChar] = true; for (int i = 0; i < serdeParams.separators.length; i++) { - serdeParams.needsEscape[serdeParams.separators[i]] = true; + serdeParams.needsEscape[serdeParams.separators[i][0]] = true; } } @@ -476,7 +493,7 @@ protected void serializeField(ByteStream.Output out, Object obj, ObjectInspector * @throws SerDeException */ public static void serialize(ByteStream.Output out, Object obj, - ObjectInspector objInspector, byte[] separators, int level, + ObjectInspector objInspector, byte[][] separators, int level, Text nullSequence, boolean escaped, byte escapeChar, boolean[] needsEscape) throws IOException, SerDeException { @@ -485,7 +502,7 @@ public static void serialize(ByteStream.Output out, Object obj, return; } - char separator; + byte[] separator; List list; switch (objInspector.getCategory()) { case PRIMITIVE: @@ -494,7 +511,7 @@ public static void serialize(ByteStream.Output out, Object obj, needsEscape); return; case LIST: - separator = (char) LazyUtils.getSeparator(separators, level); + separator = LazyUtils.getSeparator(separators, level); ListObjectInspector loi = (ListObjectInspector) objInspector; list = loi.getList(obj); ObjectInspector eoi = loi.getListElementObjectInspector(); @@ -511,9 +528,8 @@ public static void serialize(ByteStream.Output out, Object obj, } return; case MAP: - separator = (char) LazyUtils.getSeparator(separators, level); - char keyValueSeparator = - (char) LazyUtils.getSeparator(separators, level + 1); + separator = LazyUtils.getSeparator(separators, level); + byte[] keyValueSeparator = LazyUtils.getSeparator(separators, level + 1); MapObjectInspector moi = (MapObjectInspector) objInspector; ObjectInspector koi = moi.getMapKeyObjectInspector(); @@ -538,7 +554,7 @@ public static void serialize(ByteStream.Output out, Object obj, } return; case STRUCT: - separator = (char) LazyUtils.getSeparator(separators, level); + separator = LazyUtils.getSeparator(separators, level); StructObjectInspector soi = (StructObjectInspector) objInspector; List fields = soi.getAllStructFieldRefs(); list = soi.getStructFieldsDataAsList(obj); @@ -556,7 +572,7 @@ public static void serialize(ByteStream.Output out, Object obj, } return; case UNION: - separator = (char) LazyUtils.getSeparator(separators, level); + separator = LazyUtils.getSeparator(separators, level); UnionObjectInspector uoi = (UnionObjectInspector) objInspector; List ois = uoi.getObjectInspectors(); if (ois == null) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java index 9a246af..0bd9eaa 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java @@ -21,7 +21,6 @@ import java.util.Arrays; import java.util.List; -import com.google.common.primitives.Bytes; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.serde2.SerDeException; @@ -42,6 +41,9 @@ private static Log LOG = LogFactory.getLog(LazyStruct.class.getName()); + private static final byte SPACE_CHAR = (byte)' '; + private static final byte TAB_CHAR = (byte)'\t'; + /** * Whether the data is already parsed or not. */ @@ -57,8 +59,11 @@ * Note that startPosition[arrayLength] = begin + length + 1; that makes sure * we can use the same formula to compute the length of each element of the * array. + * + * note updated to use separate arrays for start and end position to allow for trim */ - int[] startPosition; + int[] dataStartPosition; + int[] dataEndPosition; /** * The fields of the struct. @@ -96,7 +101,7 @@ public void init(ByteArrayRef bytes, int start, int length) { */ private void parse() { - byte separator = oi.getSeparator(); + byte[] separator = oi.getSeparator(); boolean lastColumnTakesRest = oi.getLastColumnTakesRest(); boolean isEscaped = oi.isEscaped(); byte escapeChar = oi.getEscapeChar(); @@ -105,48 +110,39 @@ private void parse() { initLazyFields(oi.getAllStructFieldRefs()); } - int structByteEnd = start + length; - int fieldId = 0; - int fieldByteBegin = start; - int fieldByteEnd = start; + final int structByteEnd = start + length; + byte[] bytes = this.bytes.getData(); // Go through all bytes in the byte[] - while (fieldByteEnd <= structByteEnd) { - if (fieldByteEnd == structByteEnd || bytes[fieldByteEnd] == separator) { - // Reached the end of a field? - if (lastColumnTakesRest && fieldId == fields.length - 1) { - fieldByteEnd = structByteEnd; - } - startPosition[fieldId] = fieldByteBegin; - fieldId++; - if (fieldId == fields.length || fieldByteEnd == structByteEnd) { - // All fields have been parsed, or bytes have been parsed. - // We need to set the startPosition of fields.length to ensure we - // can use the same formula to calculate the length of each field. - // For missing fields, their starting positions will all be the same, - // which will make their lengths to be -1 and uncheckedGetField will - // return these fields as NULLs. - for (int i = fieldId; i <= fields.length; i++) { - startPosition[i] = fieldByteEnd + 1; - } + + Arrays.fill(dataStartPosition, -1); + Arrays.fill(dataEndPosition, -1); + dataStartPosition[0] = start; + + int fieldId = 0; + int index = start; + while (index <= structByteEnd) { + if (index == structByteEnd || isDelimiter(bytes, separator, index)) { + if (fieldId == fields.length - 1) { + dataEndPosition[fieldId] = lastColumnTakesRest ? structByteEnd : index; break; } - fieldByteBegin = fieldByteEnd + 1; - fieldByteEnd++; + dataEndPosition[fieldId] = index; + dataStartPosition[++fieldId] = Math.min(index + separator.length, structByteEnd); + index += separator.length; } else { - if (isEscaped && bytes[fieldByteEnd] == escapeChar - && fieldByteEnd + 1 < structByteEnd) { + if (isEscaped && bytes[index] == escapeChar) { // ignore the char after escape_char - fieldByteEnd += 2; + index += 2; } else { - fieldByteEnd++; + index++; } } } // Extra bytes at the end? - if (!extraFieldWarned && fieldByteEnd < structByteEnd) { + if (!extraFieldWarned && index < structByteEnd) { extraFieldWarned = true; LOG.warn("Extra bytes detected at the end of the row! Ignoring similar " + "problems."); @@ -175,9 +171,11 @@ protected final void initLazyFields(List fieldRefs) { fieldInited = new boolean[fields.length]; // Extra element to make sure we have the same formula to compute the // length of each element of the array. - startPosition = new int[fields.length + 1]; + dataStartPosition = new int[fields.length]; + dataEndPosition = new int[fields.length]; } + protected LazyObjectBase createLazyField(int fieldID, StructField fieldRef) throws SerDeException { return LazyFactory.createLazyObject(fieldRef.getFieldObjectInspector()); } @@ -218,12 +216,12 @@ private Object uncheckedGetField(int fieldID) { } fieldInited[fieldID] = true; - int fieldByteBegin = startPosition[fieldID]; - int fieldLength = startPosition[fieldID + 1] - startPosition[fieldID] - 1; - if (isNull(oi.getNullSequence(), bytes, fieldByteBegin, fieldLength)) { + int fieldStart = dataStartPosition[fieldID]; + int fieldLength = dataEndPosition[fieldID] - dataStartPosition[fieldID] - 1; + if (isNull(oi.getNullSequence(), bytes, fieldStart, fieldLength)) { fields[fieldID].setNull(); } else { - fields[fieldID].init(bytes, fieldByteBegin, fieldLength); + fields[fieldID].init(bytes, fieldStart, fieldLength); } return fields[fieldID].getObject(); } @@ -278,59 +276,22 @@ public long getRawDataSerializedSize() { return serializedSize; } - // parse the struct using multi-char delimiter - public void parseMultiDelimit(byte[] rawRow, byte[] fieldDelimit) { - if (rawRow == null || fieldDelimit == null) { - return; - } - if (fields == null) { - List fieldRefs = oi.getAllStructFieldRefs(); - fields = new LazyObject[fieldRefs.size()]; - for (int i = 0; i < fields.length; i++) { - fields[i] = LazyFactory.createLazyObject(fieldRefs.get(i).getFieldObjectInspector()); - } - fieldInited = new boolean[fields.length]; - startPosition = new int[fields.length + 1]; - } - // the indexes of the delimiters - int[] delimitIndexes = findIndexes(rawRow, fieldDelimit); - int diff = fieldDelimit.length - 1; - // first field always starts from 0, even when missing - startPosition[0] = 0; - for (int i = 1; i < fields.length; i++) { - if (delimitIndexes[i - 1] != -1) { - int start = delimitIndexes[i - 1] + fieldDelimit.length; - startPosition[i] = start - i * diff; - } else { - startPosition[i] = length + 1; + private int findDelimiter(byte[] data, byte[] delimiter, int start) { + for (int i = start; i < delimiter.length; i++) { + if (isDelimiter(data, delimiter, i)) { + return i; } } - startPosition[fields.length] = length + 1; - Arrays.fill(fieldInited, false); - parsed = true; + return -1; } - // find all the indexes of the sub byte[] - private int[] findIndexes(byte[] array, byte[] target) { - if (fields.length <= 1) { - return new int[0]; - } - int[] indexes = new int[fields.length - 1]; - Arrays.fill(indexes, -1); - indexes[0] = Bytes.indexOf(array, target); - if (indexes[0] == -1) { - return indexes; - } - int indexInNewArray = indexes[0]; - for (int i = 1; i < indexes.length; i++) { - array = Arrays.copyOfRange(array, indexInNewArray + target.length, array.length); - indexInNewArray = Bytes.indexOf(array, target); - if (indexInNewArray == -1) { - break; + private boolean isDelimiter(byte[] data, byte[] delimiter, int start) { + for (int i = 0; i < delimiter.length; i++) { + if (data[start + i] != delimiter[i]) { + return false; } - indexes[i] = indexInNewArray + indexes[i - 1] + target.length; } - return indexes; + return true; } /** diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 3943508..e33257f 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -397,7 +397,7 @@ public static void extractColumnInfo(Properties tbl, SerDeParameters serdeParams * @return separator at given level * @throws SerDeException */ - static byte getSeparator(byte[] separators, int level) throws SerDeException { + static byte[] getSeparator(byte[][] separators, int level) throws SerDeException { try{ return separators[level]; }catch(ArrayIndexOutOfBoundsException e){ @@ -412,6 +412,15 @@ static byte getSeparator(byte[] separators, int level) throws SerDeException { } } + static byte getSingleSeparator(byte[][] separators, int level) throws SerDeException { + byte[] separator = getSeparator(separators, level); + if (separator.length > 1) { + throw new SerDeException("Expected single byte separator but " + + new BytesWritable(separator) + " is acquired"); + } + return separator[0]; + } + public static void copyAndEscapeStringDataToText(byte[] inputBytes, int start, int length, byte escapeChar, Text data) { diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java index 1abd8a5..413cc53 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java @@ -45,7 +45,7 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, - List structFieldObjectInspectors, byte separator, + List structFieldObjectInspectors, byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { return getLazySimpleStructObjectInspector(structFieldNames, @@ -55,7 +55,7 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, - List structFieldObjectInspectors, byte separator, + List structFieldObjectInspectors, byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar, ObjectInspectorOptions option) { return getLazySimpleStructObjectInspector(structFieldNames, @@ -66,7 +66,7 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, List structFieldComments, - byte separator, Text nullSequence, boolean lastColumnTakesRest, + byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors, structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar, @@ -76,12 +76,12 @@ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector( List structFieldNames, List structFieldObjectInspectors, List structFieldComments, - byte separator, Text nullSequence, boolean lastColumnTakesRest, + byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped,byte escapeChar, ObjectInspectorOptions option) { ArrayList signature = new ArrayList(); signature.add(structFieldNames); signature.add(structFieldObjectInspectors); - signature.add(Byte.valueOf(separator)); + signature.add(new String(separator)); signature.add(nullSequence.toString()); signature.add(Boolean.valueOf(lastColumnTakesRest)); signature.add(Boolean.valueOf(escaped)); diff --git serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java index 9611e9f..d193ff1 100644 --- serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java +++ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java @@ -41,7 +41,7 @@ */ public class LazySimpleStructObjectInspector extends BaseStructObjectInspector { - private byte separator; + private byte[] separator; private Text nullSequence; private boolean lastColumnTakesRest; private boolean escaped; @@ -52,7 +52,7 @@ protected LazySimpleStructObjectInspector() { } protected LazySimpleStructObjectInspector( - List fields, byte separator, Text nullSequence) { + List fields, byte[] separator, Text nullSequence) { init(fields); this.separator = separator; this.nullSequence = nullSequence; @@ -63,7 +63,7 @@ protected LazySimpleStructObjectInspector( */ @Deprecated protected LazySimpleStructObjectInspector(List structFieldNames, - List structFieldObjectInspectors, byte separator, + List structFieldObjectInspectors, byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { init(structFieldNames, structFieldObjectInspectors, null, separator, @@ -72,7 +72,7 @@ protected LazySimpleStructObjectInspector(List structFieldNames, public LazySimpleStructObjectInspector(List structFieldNames, List structFieldObjectInspectors, - List structFieldComments, byte separator, Text nullSequence, + List structFieldComments, byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { init(structFieldNames, structFieldObjectInspectors, structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar); @@ -80,7 +80,7 @@ public LazySimpleStructObjectInspector(List structFieldNames, protected void init(List structFieldNames, List structFieldObjectInspectors, - List structFieldComments, byte separator, + List structFieldComments, byte[] separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped, byte escapeChar) { init(structFieldNames, structFieldObjectInspectors, structFieldComments); @@ -137,7 +137,7 @@ public Object getStructFieldData(Object data, StructField fieldRef) { } // For LazyStruct - public byte getSeparator() { + public byte[] getSeparator() { return separator; } diff --git serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java index 48f3b02..38a081a 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java +++ serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazyArrayMapStruct.java @@ -31,9 +31,6 @@ import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory; -import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap; -import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -61,7 +58,7 @@ public void testLazyArray() throws Throwable { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils .getTypeInfosFromTypeString("array").get(0), - new byte[] {(byte) 1}, 0, nullSequence, false, (byte) 0); + new byte[][] {{(byte) 1}}, 0, nullSequence, false, (byte) 0); LazyArray b = (LazyArray) LazyFactory.createLazyObject(oi); byte[] data = new byte[] {'-', '1', 1, '\\', 'N', 1, '8'}; TestLazyPrimitive.initLazyObject(b, data, 0, data.length); @@ -83,7 +80,7 @@ public void testLazyArray() throws Throwable { // Array of String oi = LazyFactory.createLazyObjectInspector(TypeInfoUtils .getTypeInfosFromTypeString("array").get(0), - new byte[] {(byte) '\t'}, 0, nullSequence, false, (byte) 0); + new byte[][] {{(byte) '\t'}}, 0, nullSequence, false, (byte) 0); b = (LazyArray) LazyFactory.createLazyObject(oi); data = new byte[] {'a', 'b', '\t', 'c', '\t', '\\', 'N', '\t', '\t', 'd'}; // Note: the first and last element of the byte[] are NOT used @@ -141,7 +138,7 @@ public void testLazyMap() throws Throwable { ObjectInspector oi = LazyFactory .createLazyObjectInspector(TypeInfoUtils .getTypeInfosFromTypeString("map").get(0), - new byte[] {(byte) 1, (byte) 2}, 0, nullSequence, false, + new byte[][] {{(byte) 1}, {(byte) 2}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); byte[] data = new byte[] {'2', 2, 'd', 'e', 'f', 1, '-', '1', 2, '\\', @@ -167,7 +164,7 @@ public void testLazyMap() throws Throwable { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get( - 0), new byte[] {(byte) '#', (byte) '\t'}, 0, nullSequence, + 0), new byte[][] {{(byte) '#'}, {(byte) '\t'}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); byte[] data = new byte[] {'2', '\t', 'd', '\t', 'f', '#', '2', '\t', @@ -206,7 +203,7 @@ public void testLazyMapWithBadEntries() throws Throwable { Text nullSequence = new Text(""); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get( - 0), new byte[] {'\2', '\3'}, 0, nullSequence, + 0), new byte[][] {{'\2'}, {'\3'}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); @@ -237,7 +234,7 @@ public void testLazyMapWithBadEntries() throws Throwable { Text nullSequence = new Text(""); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get( - 0), new byte[] {'\2', '\3'}, 0, nullSequence, + 0), new byte[][] {{'\2'}, {'\3'}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); @@ -269,7 +266,7 @@ public void testLazyMapWithBadEntries() throws Throwable { Text nullSequence = new Text(""); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get( - 0), new byte[] {'\2', '\3'}, 0, nullSequence, + 0), new byte[][] {{'\2'}, {'\3'}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); @@ -312,7 +309,7 @@ public void testLazyMapWithDuplicateKeys() throws Throwable { ObjectInspector oi = LazyFactory .createLazyObjectInspector(TypeInfoUtils .getTypeInfosFromTypeString("map").get(0), - new byte[] {(byte) 1, (byte) 2}, 0, nullSequence, false, + new byte[][] {{(byte) 1}, {(byte) 2}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); byte[] data = new byte[] {'2', 2, 'd', 'e', 'f', 1, '-', '1', 2, '\\', @@ -338,7 +335,7 @@ public void testLazyMapWithDuplicateKeys() throws Throwable { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector( TypeInfoUtils.getTypeInfosFromTypeString("map").get( - 0), new byte[] {(byte) '#', (byte) '\t'}, 0, nullSequence, + 0), new byte[][] {{(byte) '#'}, {(byte) '\t'}}, 0, nullSequence, false, (byte) 0); LazyMap b = (LazyMap) LazyFactory.createLazyObject(oi); byte[] data = new byte[] {'2', '\t', 'd', '\t', 'f', '#', '2', '\t', @@ -377,7 +374,7 @@ public void testLazyStruct() throws Throwable { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyStructInspector(fieldNames, - fieldTypeInfos, new byte[] {' ', ':', '='}, nullSequence, false, + fieldTypeInfos, new byte[][] {{' '}, {':'}, {'='}}, nullSequence, false, false, (byte) 0); LazyStruct o = (LazyStruct) LazyFactory.createLazyObject(oi); @@ -430,7 +427,7 @@ public void testLazyStruct() throws Throwable { // test LastColumnTakesRest oi = LazyFactory.createLazyStructInspector(Arrays.asList(new String[] { "a", "b", "c", "d"}), fieldTypeInfos, - new byte[] {' ', ':', '='}, nullSequence, true, false, (byte) 0); + new byte[][] {{' '}, {':'}, {'='}}, nullSequence, true, false, (byte) 0); o = (LazyStruct) LazyFactory.createLazyObject(oi); data = new Text("\\N a d=\\N:f=g:h has tail"); TestLazyPrimitive.initLazyObject(o, data.getBytes(), 0, data @@ -456,7 +453,7 @@ public void testLazyUnion() throws Throwable { Text nullSequence = new Text("\\N"); ObjectInspector oi = LazyFactory.createLazyObjectInspector(typeInfo, - new byte[] {'^', ':', '='}, 0, nullSequence, false, (byte) 0); + new byte[][] {{'^'}, {':'}, {'='}}, 0, nullSequence, false, (byte) 0); LazyUnion o = (LazyUnion) LazyFactory.createLazyObject(oi); Text data; @@ -695,23 +692,23 @@ private void testNestedinArrayAtLevel(int nestingLevel, SerDeUtils.initializeSerDe(serDe, conf, tableProp, null); //create the serialized string for type - byte[] separators = serDe.serdeParams.getSeparators(); - System.err.println("Using separator " + (char)separators[nestingLevel]); + byte[][] separators = serDe.serdeParams.getSeparators(); + System.err.println("Using separator " + (char)separators[nestingLevel][0]); byte [] serializedRow = null; switch(dtype){ case LIST: - serializedRow = new byte[] {'8',separators[nestingLevel],'9'}; + serializedRow = new byte[] {'8',separators[nestingLevel][0],'9'}; break; case MAP: - byte kvSep = separators[nestingLevel+1]; - byte kvPairSep = separators[nestingLevel]; + byte kvSep = separators[nestingLevel+1][0]; + byte kvPairSep = separators[nestingLevel][0]; serializedRow = new byte[] {'1', kvSep, '1', kvPairSep, '2', kvSep, '2'}; break; case STRUCT: - serializedRow = new byte[] {'8',separators[nestingLevel],'9'}; + serializedRow = new byte[] {'8',separators[nestingLevel][0],'9'}; break; case UNION: - serializedRow = new byte[] {'0',separators[nestingLevel],'9'}; + serializedRow = new byte[] {'0',separators[nestingLevel][0],'9'}; break; default : fail("type not supported by test case");