diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java index 654e92b..a154349 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/llap/ext/TestLlapInputSplit.java @@ -27,7 +27,7 @@ import org.apache.hadoop.hive.llap.LlapInputSplit; import org.apache.hadoop.hive.llap.Schema; import org.apache.hadoop.hive.llap.FieldDesc; -import org.apache.hadoop.hive.llap.TypeDesc; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.SplitLocationInfo; import org.junit.Test; @@ -46,8 +46,8 @@ public void testWritable() throws Exception { }; ArrayList colDescs = new ArrayList(); - colDescs.add(new FieldDesc("col1", new TypeDesc(TypeDesc.Type.STRING))); - colDescs.add(new FieldDesc("col2", new TypeDesc(TypeDesc.Type.INT))); + colDescs.add(new FieldDesc("col1", TypeInfoFactory.stringTypeInfo)); + colDescs.add(new FieldDesc("col2", TypeInfoFactory.intTypeInfo)); Schema schema = new Schema(colDescs); byte[] tokenBytes = new byte[] { 1 }; diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java index de47412..4cc9045 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniLlap.java @@ -62,6 +62,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.llap.FieldDesc; import org.apache.hadoop.hive.llap.LlapRowRecordReader; import org.apache.hadoop.hive.llap.Row; import org.apache.hadoop.hive.llap.Schema; @@ -72,6 +73,12 @@ import org.apache.hive.jdbc.miniHS2.MiniHS2.MiniClusterType; import org.apache.hadoop.hive.llap.LlapBaseInputFormat; import org.apache.hadoop.hive.llap.LlapRowInputFormat; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.datanucleus.ClassLoaderResolver; import org.datanucleus.NucleusContext; @@ -160,6 +167,40 @@ private void createTestTable(String tableName) throws Exception { stmt.close(); } + private void createTableWithComplexTypes(String tableName) throws Exception { + Statement stmt = hs2Conn.createStatement(); + + // create table + stmt.execute("DROP TABLE IF EXISTS " + tableName); + stmt.execute("CREATE TABLE " + tableName + + " (c0 int, c1 array, c2 map, c3 struct>, c4 array>>)"); + + // load data + stmt.execute("insert into " + tableName + + " select 1" + + ", array(1, 2, 3)" + + ", map(1, 'one', 2, 'two')" + + ", named_struct('f1', 1, 'f2', 'two', 'f3', array(1,2,3))" + + ", array(named_struct('f1', 11, 'f2', 'two', 'f3', array(2,3,4)))"); + + // Inserting nulls into complex columns doesn't work without this CASE workaround - what a hack. + stmt.execute("insert into " + tableName + + " select 2" + + ", case when 2 = 2 then null else array(1, 2, 3) end" + + ", case when 2 = 2 then null else map(1, 'one', 2, 'two') end" + + ", case when 2 = 2 then null else named_struct('f1', 1, 'f2', 'two', 'f3', array(1,2,3)) end" + + ", case when 2 = 2 then null else array(named_struct('f1', 11, 'f2', 'two', 'f3', array(2,3,4))) end"); + + // TODO: test nested nulls in complex types. Currently blocked by HIVE-16587. + //stmt.execute("insert into " + tableName + // + " select 3" + // + ", array(1, 2, null)" + // + ", map(1, 'one', 2, null)" + // + ", named_struct('f1', cast(null as int), 'f2', cast(null as string), 'f3', array(1,2,null))" + // + ", array(named_struct('f1', 11, 'f2', 'two', 'f3', array(2,3,4)))"); + stmt.close(); + } + @Test(timeout = 60000) public void testLlapInputFormatEndToEnd() throws Exception { createTestTable("testtab1"); @@ -212,6 +253,105 @@ public void testEscapedStrings() throws Exception { assertArrayEquals(new String[] {"val_0", expectedVal1, expectedVal2}, rowCollector.rows.get(2)); } + @Test(timeout = 60000) + public void testComplexTypes() throws Exception { + createTableWithComplexTypes("complex1"); + RowCollector2 rowCollector = new RowCollector2(); + String query = "select * from complex1"; + int rowCount = processQuery(query, 1, rowCollector); + assertEquals(2, rowCount); + + // Verify schema + FieldDesc c0Desc = rowCollector.schema.getColumns().get(0); + assertEquals("complex1.c0", c0Desc.getName()); + assertEquals("int", c0Desc.getTypeInfo().getTypeName()); + + FieldDesc c1Desc = rowCollector.schema.getColumns().get(1); + assertEquals("complex1.c1", c1Desc.getName()); + assertEquals("array", c1Desc.getTypeInfo().getTypeName()); + + FieldDesc c2Desc = rowCollector.schema.getColumns().get(2); + assertEquals("complex1.c2", c2Desc.getName()); + assertEquals("map", c2Desc.getTypeInfo().getTypeName()); + + FieldDesc c3Desc = rowCollector.schema.getColumns().get(3); + assertEquals("complex1.c3", c3Desc.getName()); + assertEquals(Category.STRUCT, c3Desc.getTypeInfo().getCategory()); + verifyStructFieldSchema((StructTypeInfo) c3Desc.getTypeInfo()); + + FieldDesc c4Desc = rowCollector.schema.getColumns().get(4); + assertEquals("complex1.c4", c4Desc.getName()); + assertEquals(Category.LIST, c4Desc.getTypeInfo().getCategory()); + TypeInfo c4ElementType = ((ListTypeInfo) c4Desc.getTypeInfo()).getListElementTypeInfo(); + assertEquals(Category.STRUCT, c4ElementType.getCategory()); + verifyStructFieldSchema((StructTypeInfo) c4ElementType); + + // First row + Object[] rowValues = rowCollector.rows.get(0); + assertEquals(Integer.valueOf(1), ((Integer) rowValues[0])); + + // assertEquals("[1, 2, 3]", rowValues[1]); + List c1Value = (List) rowValues[1]; + assertEquals(3, c1Value.size()); + assertEquals(Integer.valueOf(1), c1Value.get(0)); + assertEquals(Integer.valueOf(2), c1Value.get(1)); + assertEquals(Integer.valueOf(3), c1Value.get(2)); + + // assertEquals("{1=one, 2=two}", rowValues[2]); + Map c2Value = (Map) rowValues[2]; + assertEquals(2, c2Value.size()); + assertEquals("one", c2Value.get(Integer.valueOf(1))); + assertEquals("two", c2Value.get(Integer.valueOf(2))); + + // assertEquals("[1, two, [1, 2, 3]]", rowValues[3]); + List c3Value = (List) rowValues[3]; + assertEquals(Integer.valueOf(1), c3Value.get(0)); + assertEquals("two", c3Value.get(1)); + List f3Value = (List) c3Value.get(2); + assertEquals(Integer.valueOf(1), f3Value.get(0)); + assertEquals(Integer.valueOf(2), f3Value.get(1)); + assertEquals(Integer.valueOf(3), f3Value.get(2)); + + // assertEquals("[[11, two, [2, 3, 4]]]", rowValues[4]); + List c4Value = (List) rowValues[4]; + assertEquals(1, c4Value.size()); + List c4Element = (List) c4Value.get(0); + assertEquals(Integer.valueOf(11), c4Element.get(0)); + assertEquals("two", c4Element.get(1)); + f3Value = (List) c4Element.get(2); + assertEquals(3, f3Value.size()); + assertEquals(Integer.valueOf(2), f3Value.get(0)); + assertEquals(Integer.valueOf(3), f3Value.get(1)); + assertEquals(Integer.valueOf(4), f3Value.get(2)); + + // Second row + rowValues = rowCollector.rows.get(1); + assertEquals(Integer.valueOf(2), ((Integer) rowValues[0])); + assertEquals(null, rowValues[1]); + assertEquals(null, rowValues[2]); + assertEquals(null, rowValues[3]); + assertEquals(null, rowValues[4]); + } + + private void verifyStructFieldSchema(StructTypeInfo structType) { + assertEquals("f1", structType.getAllStructFieldNames().get(0)); + TypeInfo f1Type = structType.getStructFieldTypeInfo("f1"); + assertEquals(Category.PRIMITIVE, f1Type.getCategory()); + assertEquals(PrimitiveCategory.INT, ((PrimitiveTypeInfo) f1Type).getPrimitiveCategory()); + + assertEquals("f2", structType.getAllStructFieldNames().get(1)); + TypeInfo f2Type = structType.getStructFieldTypeInfo("f2"); + assertEquals(Category.PRIMITIVE, f2Type.getCategory()); + assertEquals(PrimitiveCategory.STRING, ((PrimitiveTypeInfo) f2Type).getPrimitiveCategory()); + + assertEquals("f3", structType.getAllStructFieldNames().get(2)); + TypeInfo f3Type = structType.getStructFieldTypeInfo("f3"); + assertEquals(Category.LIST, f3Type.getCategory()); + assertEquals( + PrimitiveCategory.INT, + ((PrimitiveTypeInfo) ((ListTypeInfo) f3Type).getListElementTypeInfo()).getPrimitiveCategory()); + } + private interface RowProcessor { void process(Row row); } @@ -235,6 +375,26 @@ public void process(Row row) { } } + // Save the actual values from each row as opposed to the String representation. + private static class RowCollector2 implements RowProcessor { + ArrayList rows = new ArrayList(); + Schema schema = null; + int numColumns = 0; + + public void process(Row row) { + if (schema == null) { + schema = row.getSchema(); + numColumns = schema.getColumns().size(); + } + + Object[] arr = new Object[numColumns]; + for (int idx = 0; idx < numColumns; ++idx) { + arr[idx] = row.getValue(idx); + } + rows.add(arr); + } + } + private int processQuery(String query, int numSplits, RowProcessor rowProcessor) throws Exception { String url = miniHS2.getJdbcURL(); String user = System.getProperty("user.name"); diff --git a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java index ee92f3e..e3c0955 100644 --- a/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java +++ b/llap-client/src/java/org/apache/hadoop/hive/llap/LlapRowRecordReader.java @@ -21,7 +21,11 @@ import com.google.common.base.Preconditions; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Properties; import org.apache.hadoop.conf.Configuration; @@ -32,17 +36,23 @@ import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.llap.Row; import org.apache.hadoop.hive.llap.FieldDesc; import org.apache.hadoop.hive.llap.Schema; -import org.apache.hadoop.hive.llap.TypeDesc; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable; import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; @@ -111,27 +121,7 @@ public boolean next(NullWritable key, Row value) throws IOException { try { StructObjectInspector rowOI = (StructObjectInspector) serde.getObjectInspector(); rowObj = serde.deserialize(textData); - List colFields = rowOI.getAllStructFieldRefs(); - for (int idx = 0; idx < colFields.size(); ++idx) { - StructField field = colFields.get(idx); - Object colValue = rowOI.getStructFieldData(rowObj, field); - Preconditions.checkState(field.getFieldObjectInspector().getCategory() == Category.PRIMITIVE, - "Cannot handle non-primitive column type " + field.getFieldObjectInspector().getTypeName()); - - PrimitiveObjectInspector poi = (PrimitiveObjectInspector) field.getFieldObjectInspector(); - // char/varchar special cased here since the row record handles them using Text - switch (poi.getPrimitiveCategory()) { - case CHAR: - value.setValue(idx, ((HiveCharWritable) poi.getPrimitiveWritableObject(colValue)).getPaddedValue()); - break; - case VARCHAR: - value.setValue(idx, ((HiveVarcharWritable) poi.getPrimitiveWritableObject(colValue)).getTextValue()); - break; - default: - value.setValue(idx, (Writable) poi.getPrimitiveWritableObject(colValue)); - break; - } - } + setRowFromStruct(value, rowObj, rowOI); } catch (SerDeException err) { if (LOG.isDebugEnabled()) { LOG.debug("Error deserializing row from text: " + textData); @@ -147,6 +137,96 @@ public Schema getSchema() { return schema; } + static Object convertPrimitive(Object val, PrimitiveObjectInspector poi) { + switch (poi.getPrimitiveCategory()) { + // Save char/varchar as string + case CHAR: + return ((HiveChar) poi.getPrimitiveJavaObject(val)).getPaddedValue(); + case VARCHAR: + return ((HiveVarchar) poi.getPrimitiveJavaObject(val)).toString(); + case DECIMAL: + return ((HiveDecimal) poi.getPrimitiveJavaObject(val)).bigDecimalValue(); + default: + return poi.getPrimitiveJavaObject(val); + } + } + + static Object convertValue(Object val, ObjectInspector oi) { + if (val == null) { + return null; + } + + Object convertedVal = null; + ObjectInspector.Category oiCategory = oi.getCategory(); + switch (oiCategory) { + case PRIMITIVE: + convertedVal = convertPrimitive(val, (PrimitiveObjectInspector) oi); + break; + case LIST: + ListObjectInspector loi = (ListObjectInspector) oi; + int listSize = loi.getListLength(val); + // Per ListObjectInpsector.getListLength(), -1 length means null list. + if (listSize < 0) { + return null; + } + List convertedList = new ArrayList(listSize); + ObjectInspector listElementOI = loi.getListElementObjectInspector(); + for (int idx = 0; idx < listSize; ++idx) { + convertedList.add(convertValue(loi.getListElement(val, idx), listElementOI)); + } + convertedVal = convertedList; + break; + case MAP: + MapObjectInspector moi = (MapObjectInspector) oi; + int mapSize = moi.getMapSize(val); + // Per MapObjectInpsector.getMapSize(), -1 length means null map. + if (mapSize < 0) { + return null; + } + Map convertedMap = new LinkedHashMap(mapSize); + ObjectInspector mapKeyOI = moi.getMapKeyObjectInspector(); + ObjectInspector mapValOI = moi.getMapValueObjectInspector(); + Map mapCol = moi.getMap(val); + for (Object mapKey : mapCol.keySet()) { + Object convertedMapKey = convertValue(mapKey, mapKeyOI); + Object convertedMapVal = convertValue(mapCol.get(mapKey), mapValOI); + convertedMap.put(convertedMapKey, convertedMapVal); + } + convertedVal = convertedMap; + break; + case STRUCT: + StructObjectInspector soi = (StructObjectInspector) oi; + List convertedRow = new ArrayList(); + for (StructField structField : soi.getAllStructFieldRefs()) { + Object convertedFieldValue = convertValue( + soi.getStructFieldData(val, structField), + structField.getFieldObjectInspector()); + convertedRow.add(convertedFieldValue); + } + convertedVal = convertedRow; + break; + default: + throw new IllegalArgumentException("Cannot convert type " + oiCategory); + } + + return convertedVal; + } + + static void setRowFromStruct(Row row, Object structVal, StructObjectInspector soi) { + Schema structSchema = row.getSchema(); + // Add struct field data to the Row + List fieldDescs = structSchema.getColumns(); + for (int idx = 0; idx < fieldDescs.size(); ++idx) { + FieldDesc fieldDesc = fieldDescs.get(idx); + StructField structField = soi.getStructFieldRef(fieldDesc.getName()); + + Object convertedFieldValue = convertValue( + soi.getStructFieldData(structVal, structField), + structField.getFieldObjectInspector()); + row.setValue(idx, convertedFieldValue); + } + } + protected AbstractSerDe initSerDe(Configuration conf) throws SerDeException { Properties props = new Properties(); StringBuffer columnsBuffer = new StringBuffer(); @@ -158,7 +238,7 @@ protected AbstractSerDe initSerDe(Configuration conf) throws SerDeException { typesBuffer.append(','); } columnsBuffer.append(colDesc.getName()); - typesBuffer.append(colDesc.getTypeDesc().toString()); + typesBuffer.append(colDesc.getTypeInfo().toString()); isFirst = false; } String columns = columnsBuffer.toString(); diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java b/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java index 9621978..19f482d 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/FieldDesc.java @@ -21,43 +21,45 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; + +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Writable; public class FieldDesc implements Writable { private String name; - private TypeDesc typeDesc; + private TypeInfo typeInfo; public FieldDesc() { - typeDesc = new TypeDesc(); } - public FieldDesc(String name, TypeDesc typeDesc) { + public FieldDesc(String name, TypeInfo typeInfo) { this.name = name; - this.typeDesc = typeDesc; + this.typeInfo = typeInfo; } public String getName() { return name; } - public TypeDesc getTypeDesc() { - return typeDesc; + public TypeInfo getTypeInfo() { + return typeInfo; } @Override public String toString() { - return getName() + ":" + getTypeDesc().toString(); + return getName() + ":" + getTypeInfo().toString(); } @Override public void write(DataOutput out) throws IOException { out.writeUTF(name); - typeDesc.write(out); + out.writeUTF(typeInfo.toString()); } @Override public void readFields(DataInput in) throws IOException { name = in.readUTF(); - typeDesc.readFields(in); + typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(in.readUTF()); } } \ No newline at end of file diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java b/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java index a84fadc..40a6ef5 100644 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java +++ b/llap-common/src/java/org/apache/hadoop/hive/llap/Row.java @@ -17,150 +17,184 @@ */ package org.apache.hadoop.hive.llap; +import java.math.BigDecimal; +import java.sql.Date; +import java.sql.Timestamp; import java.util.HashMap; import java.util.List; import java.util.Map; import com.google.common.base.Preconditions; -import org.apache.hadoop.hive.serde2.io.ByteWritable; -import org.apache.hadoop.hive.serde2.io.DateWritable; -import org.apache.hadoop.hive.serde2.io.DoubleWritable; -import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable; -import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable; -import org.apache.hadoop.hive.serde2.io.ShortWritable; -import org.apache.hadoop.hive.serde2.io.TimestampWritable; - -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.Writable; - - public class Row { private final Schema schema; - private final Writable[] colValues; - private final boolean[] nullIndicators; + private final Object[] colValues; private Map nameToIndexMapping; public Row(Schema schema) { this.schema = schema; - this.colValues = new Writable[schema.getColumns().size()]; - this.nullIndicators = new boolean[schema.getColumns().size()]; + this.colValues = new Object[schema.getColumns().size()]; this.nameToIndexMapping = new HashMap(schema.getColumns().size()); List colDescs = schema.getColumns(); for (int idx = 0; idx < colDescs.size(); ++idx) { FieldDesc colDesc = colDescs.get(idx); nameToIndexMapping.put(colDesc.getName(), idx); - colValues[idx] = createWritableForType(colDesc.getTypeDesc()); } } - public Writable getValue(int colIndex) { - if (nullIndicators[colIndex]) { - return null; - } + public Object getValue(int colIndex) { return colValues[colIndex]; } - public Writable getValue(String colName) { + public Object getValue(String colName) { Integer idx = nameToIndexMapping.get(colName); Preconditions.checkArgument(idx != null); return getValue(idx); } + public Boolean getBoolean(int idx) { + return (Boolean) getValue(idx); + } + + public Boolean getBoolean(String colName) { + return (Boolean) getValue(colName); + } + + public Byte getByte(int idx) { + return (Byte) getValue(idx); + } + + public Byte getByte(String colName) { + return (Byte) getValue(colName); + } + + public Short getShort(int idx) { + return (Short) getValue(idx); + } + + public Short getShort(String colName) { + return (Short) getValue(colName); + } + + public Integer getInt(int idx) { + return (Integer) getValue(idx); + } + + public Integer getInt(String colName) { + return (Integer) getValue(colName); + } + + public Long getLong(int idx) { + return (Long) getValue(idx); + } + + public Long getLong(String colName) { + return (Long) getValue(colName); + } + + public Float getFloat(int idx) { + return (Float) getValue(idx); + } + + public Float getFloat(String colName) { + return (Float) getValue(colName); + } + + public Double getDouble(int idx) { + return (Double) getValue(idx); + } + + public Double getDouble(String colName) { + return (Double) getValue(colName); + } + + public String getString(int idx) { + return (String) getValue(idx); + } + + public String getString(String colName) { + return (String) getValue(colName); + } + + public Date getDate(int idx) { + return (Date) getValue(idx); + } + + public Date getDate(String colName) { + return (Date) getValue(colName); + } + + public Timestamp getTimestamp(int idx) { + return (Timestamp) getValue(idx); + } + + public Timestamp getTimestamp(String colName) { + return (Timestamp) getValue(colName); + } + + public byte[] getBytes(int idx) { + return (byte[]) getValue(idx); + } + + public byte[] getBytes(String colName) { + return (byte[]) getValue(colName); + } + + public BigDecimal getDecimal(int idx) { + return (BigDecimal) getValue(idx); + } + + public BigDecimal getDecimal(String colName) { + return (BigDecimal) getValue(colName); + } + + public List getList(int idx) { + return (List) getValue(idx); + } + + public List getList(String colName) { + return (List) getValue(colName); + } + + public Map getMap(int idx) { + return (Map) getValue(idx); + } + + public Map getMap(String colName) { + return (Map) getValue(colName); + } + + // Struct value is simply a list of values. + // The schema can be used to map the field name to the position in the list. + public List getStruct(int idx) { + return (List) getValue(idx); + } + + public List getStruct(String colName) { + return (List) getValue(colName); + } + public Schema getSchema() { return schema; } - void setValue(int colIdx, Writable value) { - Preconditions.checkArgument(colIdx <= schema.getColumns().size()); - - if (value == null) { - nullIndicators[colIdx] = true; - } else { - nullIndicators[colIdx] = false; - FieldDesc colDesc = schema.getColumns().get(colIdx); - switch (colDesc.getTypeDesc().getType()) { - case BOOLEAN: - ((BooleanWritable) colValues[colIdx]).set(((BooleanWritable) value).get()); - break; - case TINYINT: - ((ByteWritable) colValues[colIdx]).set(((ByteWritable) value).get()); - break; - case SMALLINT: - ((ShortWritable) colValues[colIdx]).set(((ShortWritable) value).get()); - break; - case INT: - ((IntWritable) colValues[colIdx]).set(((IntWritable) value).get()); - break; - case BIGINT: - ((LongWritable) colValues[colIdx]).set(((LongWritable) value).get()); - break; - case FLOAT: - ((FloatWritable) colValues[colIdx]).set(((FloatWritable) value).get()); - break; - case DOUBLE: - ((DoubleWritable) colValues[colIdx]).set(((DoubleWritable) value).get()); - break; - case STRING: - // Just handle char/varchar as Text - case CHAR: - case VARCHAR: - ((Text) colValues[colIdx]).set((Text) value); - break; - case DATE: - ((DateWritable) colValues[colIdx]).set((DateWritable) value); - break; - case TIMESTAMP: - ((TimestampWritable) colValues[colIdx]).set((TimestampWritable) value); - break; - case BINARY: - ((BytesWritable) colValues[colIdx]).set(((BytesWritable) value)); - break; - case DECIMAL: - ((HiveDecimalWritable) colValues[colIdx]).set((HiveDecimalWritable) value); - break; - } - } + void setValue(int colIdx, Object obj) { + colValues[colIdx] = obj; } - private Writable createWritableForType(TypeDesc typeDesc) { - switch (typeDesc.getType()) { - case BOOLEAN: - return new BooleanWritable(); - case TINYINT: - return new ByteWritable(); - case SMALLINT: - return new ShortWritable(); - case INT: - return new IntWritable(); - case BIGINT: - return new LongWritable(); - case FLOAT: - return new FloatWritable(); - case DOUBLE: - return new DoubleWritable(); - case STRING: - // Just handle char/varchar as Text - case CHAR: - case VARCHAR: - return new Text(); - case DATE: - return new DateWritable(); - case TIMESTAMP: - return new TimestampWritable(); - case BINARY: - return new BytesWritable(); - case DECIMAL: - return new HiveDecimalWritable(); - default: - throw new RuntimeException("Cannot create writable for " + typeDesc.getType()); + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("["); + for (int idx = 0; idx < schema.getColumns().size(); ++idx) { + if (idx > 0) { + sb.append(", "); + } + Object val = getValue(idx); + sb.append(val == null ? "null" : val.toString()); } + sb.append("]"); + return sb.toString(); } } diff --git a/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java b/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java deleted file mode 100644 index dda5928..0000000 --- a/llap-common/src/java/org/apache/hadoop/hive/llap/TypeDesc.java +++ /dev/null @@ -1,108 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.llap; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import org.apache.hadoop.io.Writable; - -public class TypeDesc implements Writable { - public static enum Type { - BOOLEAN, - TINYINT, - SMALLINT, - INT, - BIGINT, - FLOAT, - DOUBLE, - STRING, - CHAR, - VARCHAR, - DATE, - TIMESTAMP, - BINARY, - DECIMAL, - } - - private TypeDesc.Type type; - private int precision; - private int scale; - - // For types with no type qualifiers - public TypeDesc(TypeDesc.Type type) { - this(type, 0, 0); - } - - // For decimal types - public TypeDesc(TypeDesc.Type type, int precision, int scale) { - this.type = type; - this.precision = precision; - this.scale = scale; - } - - // For char/varchar types - public TypeDesc(TypeDesc.Type type, int precision) { - this(type, precision, 0); - } - - // Should be used for serialization only - public TypeDesc() { - this(TypeDesc.Type.INT, 0, 0); - } - - public TypeDesc.Type getType() { - return type; - } - - public int getPrecision() { - return precision; - } - - public int getScale() { - return scale; - } - - @Override - public String toString() { - switch (type) { - case DECIMAL: - return type.name().toLowerCase() + "(" + precision + "," + scale + ")"; - case CHAR: - case VARCHAR: - return type.name().toLowerCase() + "(" + precision + ")"; - default: - return type.name().toLowerCase(); - } - } - - @Override - public void write(DataOutput out) throws IOException { - out.writeUTF(type.name()); - out.writeInt(precision); - out.writeInt(scale); - } - - @Override - public void readFields(DataInput in) throws IOException { - type = TypeDesc.Type.valueOf(in.readUTF()); - precision = in.readInt(); - scale = in.readInt(); - } -} \ No newline at end of file diff --git a/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java b/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java index d4e68f4..37e934d 100644 --- a/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java +++ b/llap-common/src/test/org/apache/hadoop/hive/llap/TestRow.java @@ -23,8 +23,7 @@ import org.apache.commons.lang.RandomStringUtils; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.junit.After; import org.junit.Before; @@ -42,16 +41,14 @@ public void testUsage() { Random rand = new Random(); int iterations = 100; - Text col0 = new Text(); - IntWritable col1 = new IntWritable(); for (int idx = 0; idx < iterations; ++idx) { // Set the row values boolean isNullCol0 = (rand.nextDouble() <= 0.25); - col0.set(RandomStringUtils.random(10)); + String col0 = RandomStringUtils.random(10); row.setValue(0, isNullCol0 ? null : col0); boolean isNullCol1 = (rand.nextDouble() <= 0.25); - col1.set(rand.nextInt()); + Integer col1 = Integer.valueOf(rand.nextInt()); row.setValue(1, isNullCol1 ? null : col1); // Validate the row values @@ -60,7 +57,6 @@ public void testUsage() { assertTrue(row.getValue("col0") == null); } else { assertTrue(row.getValue(0) != null); - assertTrue(col0 != row.getValue(0)); assertEquals(col0, row.getValue(0)); assertEquals(col0, row.getValue("col0")); } @@ -70,7 +66,6 @@ public void testUsage() { assertTrue(row.getValue("col1") == null); } else { assertTrue(row.getValue(1) != null); - assertTrue(col1 != row.getValue(1)); assertEquals(col1, row.getValue(1)); assertEquals(col1, row.getValue("col1")); } @@ -81,10 +76,10 @@ private Schema createTestSchema() { List colDescs = new ArrayList(); colDescs.add(new FieldDesc("col0", - new TypeDesc(TypeDesc.Type.STRING))); + TypeInfoFactory.stringTypeInfo)); colDescs.add(new FieldDesc("col1", - new TypeDesc(TypeDesc.Type.INT))); + TypeInfoFactory.intTypeInfo)); Schema schema = new Schema(colDescs); return schema; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java index 9ddbd7e..868eec7 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFGetSplits.java @@ -46,7 +46,6 @@ import org.apache.hadoop.hive.llap.NotTezEventHelper; import org.apache.hadoop.hive.llap.Schema; import org.apache.hadoop.hive.llap.SubmitWorkInfo; -import org.apache.hadoop.hive.llap.TypeDesc; import org.apache.hadoop.hive.llap.coordinator.LlapCoordinator; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.QueryIdentifierProto; import org.apache.hadoop.hive.llap.daemon.rpc.LlapDaemonProtocolProtos.SignableVertexSpec; @@ -82,12 +81,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; -import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.SplitLocationInfo; @@ -526,76 +520,13 @@ private String getSha(Path localFile, Configuration conf) throws IOException, } } - private TypeDesc convertTypeString(String typeString) throws HiveException { - TypeDesc typeDesc; - TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeString); - Preconditions.checkState( - typeInfo.getCategory() == ObjectInspector.Category.PRIMITIVE, - "Unsupported non-primitive type " + typeString); - - switch (((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory()) { - case BOOLEAN: - typeDesc = new TypeDesc(TypeDesc.Type.BOOLEAN); - break; - case BYTE: - typeDesc = new TypeDesc(TypeDesc.Type.TINYINT); - break; - case SHORT: - typeDesc = new TypeDesc(TypeDesc.Type.SMALLINT); - break; - case INT: - typeDesc = new TypeDesc(TypeDesc.Type.INT); - break; - case LONG: - typeDesc = new TypeDesc(TypeDesc.Type.BIGINT); - break; - case FLOAT: - typeDesc = new TypeDesc(TypeDesc.Type.FLOAT); - break; - case DOUBLE: - typeDesc = new TypeDesc(TypeDesc.Type.DOUBLE); - break; - case STRING: - typeDesc = new TypeDesc(TypeDesc.Type.STRING); - break; - case CHAR: - CharTypeInfo charTypeInfo = (CharTypeInfo) typeInfo; - typeDesc = new TypeDesc(TypeDesc.Type.CHAR, charTypeInfo.getLength()); - break; - case VARCHAR: - VarcharTypeInfo varcharTypeInfo = (VarcharTypeInfo) typeInfo; - typeDesc = new TypeDesc(TypeDesc.Type.VARCHAR, - varcharTypeInfo.getLength()); - break; - case DATE: - typeDesc = new TypeDesc(TypeDesc.Type.DATE); - break; - case TIMESTAMP: - typeDesc = new TypeDesc(TypeDesc.Type.TIMESTAMP); - break; - case BINARY: - typeDesc = new TypeDesc(TypeDesc.Type.BINARY); - break; - case DECIMAL: - DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo; - typeDesc = new TypeDesc(TypeDesc.Type.DECIMAL, - decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale()); - break; - default: - throw new HiveException("Unsupported type " + typeString); - } - - return typeDesc; - } - private Schema convertSchema(Object obj) throws HiveException { org.apache.hadoop.hive.metastore.api.Schema schema = (org.apache.hadoop.hive.metastore.api.Schema) obj; List colDescs = new ArrayList(); for (FieldSchema fs : schema.getFieldSchemas()) { String colName = fs.getName(); String typeString = fs.getType(); - TypeDesc typeDesc = convertTypeString(typeString); - colDescs.add(new FieldDesc(colName, typeDesc)); + colDescs.add(new FieldDesc(colName, TypeInfoUtils.getTypeInfoFromTypeString(typeString))); } Schema Schema = new Schema(colDescs); return Schema; diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java index b003eb8..bb79857 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java @@ -60,7 +60,6 @@ import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.llap.TypeDesc; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.Utilities;