Index: metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (revision 1580963) +++ metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (working copy) @@ -957,11 +957,16 @@ String partString = ""; String partStringSep = ""; + String partTypesString = ""; + String partTypesStringSep = ""; for (FieldSchema partKey : partitionKeys) { partString = partString.concat(partStringSep); partString = partString.concat(partKey.getName()); + partTypesString = partTypesString.concat(partTypesStringSep); + partTypesString = partTypesString.concat(partKey.getType()); if (partStringSep.length() == 0) { partStringSep = "/"; + partTypesStringSep = ":"; } } if (partString.length() > 0) { @@ -969,6 +974,10 @@ .setProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, partString); + schema + .setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES, + partTypesString); } if (parameters != null) { Index: metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java =================================================================== --- metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java (revision 1580963) +++ metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/hive_metastoreConstants.java (working copy) @@ -66,6 +66,8 @@ public static final String META_TABLE_SERDE = "serde"; public static final String META_TABLE_PARTITION_COLUMNS = "partition_columns"; + + public static final String META_TABLE_PARTITION_COLUMN_TYPES = "partition_columns.types"; public static final String FILE_INPUT_FORMAT = "file.inputformat"; Index: ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java (revision 1580963) +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java (working copy) @@ -202,7 +202,7 @@ Assert.assertEquals("Field size should be 9", colCount, fieldRefs.size()); // Create the context - VectorizedRowBatchCtx ctx = new VectorizedRowBatchCtx(oi, oi, serDe, null); + VectorizedRowBatchCtx ctx = new VectorizedRowBatchCtx(oi, oi, serDe, null, null); VectorizedRowBatch batch = ctx.createVectorizedRowBatch(); VectorizedBatchUtil.SetNoNullFields(true, batch); Index: ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (revision 1580963) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (working copy) @@ -8588,10 +8588,8 @@ // Finally add the partitioning columns for (FieldSchema part_col : tab.getPartCols()) { LOG.trace("Adding partition col: " + part_col); - // TODO: use the right type by calling part_col.getType() instead of - // String.class. See HIVE-3059. rwsch.put(alias, part_col.getName(), new ColumnInfo(part_col.getName(), - TypeInfoFactory.stringTypeInfo, alias, true)); + TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()), alias, true)); } // put all virutal columns in RowResolver. Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (revision 1580963) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (working copy) @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; @@ -179,7 +180,7 @@ PartitionDesc pd = ctx.partDesc; TableDesc td = pd.getTableDesc(); - + MapOpCtx opCtx = new MapOpCtx(); // Use table properties in case of unpartitioned tables, // and the union of table properties and partition properties, with partition @@ -203,28 +204,43 @@ opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( partRawRowObjectInspector, opCtx.tblRawRowObjectInspector); - + // Next check if this table has partitions and if so // get the list of partition names as well as allocate // the serdes for the partition columns String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); - // Log LOG = LogFactory.getLog(MapOperator.class.getName()); + if (pcols != null && pcols.length() > 0) { String[] partKeys = pcols.trim().split("/"); + String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + String[] partKeyTypes = pcolTypes.trim().split(":"); + + if (partKeys.length != partKeyTypes.length) { + throw new HiveException("Internal error : partKeys length, " +partKeys.length + + " not the same as partKeyTypes length, " + partKeyTypes.length); + } + List partNames = new ArrayList(partKeys.length); Object[] partValues = new Object[partKeys.length]; List partObjectInspectors = new ArrayList(partKeys.length); + for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; partNames.add(key); + ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector + (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); + // Partitions do not exist for this table if (partSpec == null) { // for partitionless table, initialize partValue to null partValues[i] = null; } else { - partValues[i] = new Text(partSpec.get(key)); + partValues[i] = + ObjectInspectorConverters. + getConverter(PrimitiveObjectInspectorFactory. + javaStringObjectInspector, oi).convert(partSpec.get(key)); } - partObjectInspectors.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + partObjectInspectors.add(oi); } opCtx.rowWithPart = new Object[] {null, partValues}; opCtx.partObjectInspector = ObjectInspectorFactory Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (revision 1580963) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (working copy) @@ -59,6 +59,10 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.InputFormat; @@ -245,9 +249,12 @@ String pcols = partition.getTableDesc().getProperties().getProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); - row[1] = createPartValue(partKeys, partition.getPartSpec()); + String pcolTypes = partition.getTableDesc().getProperties().getProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + String[] partKeyTypes = pcolTypes.trim().split(":"); + row[1] = createPartValue(partKeys, partition.getPartSpec(), partKeyTypes); - return createRowInspector(getStructOIFrom(partitionOI), partKeys); + return createRowInspector(getStructOIFrom(partitionOI), partKeys, partKeyTypes); } private StructObjectInspector getRowInspectorFromPartitionedTable(TableDesc table) @@ -257,8 +264,11 @@ String pcols = table.getProperties().getProperty( org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS); String[] partKeys = pcols.trim().split("/"); + String pcolTypes = table.getProperties().getProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + String[] partKeyTypes = pcolTypes.trim().split(":"); row[1] = null; - return createRowInspector(getStructOIFrom(serde.getObjectInspector()), partKeys); + return createRowInspector(getStructOIFrom(serde.getObjectInspector()), partKeys, partKeyTypes); } private StructObjectInspector getStructOIFrom(ObjectInspector current) throws SerDeException { @@ -276,13 +286,16 @@ Arrays.asList(current, vcsOI)) : current; } - private StructObjectInspector createRowInspector(StructObjectInspector current, String[] partKeys) + private StructObjectInspector createRowInspector(StructObjectInspector current, String[] partKeys, String[] partKeyTypes) throws SerDeException { List partNames = new ArrayList(); List partObjectInspectors = new ArrayList(); - for (String key : partKeys) { - partNames.add(key); - partObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); + for (int i = 0; i < partKeys.length; i++) { + String key = partKeys[i]; + partNames.add(key); + ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector + (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); + partObjectInspectors.add(oi); } StructObjectInspector partObjectInspector = ObjectInspectorFactory .getStandardStructObjectInspector(partNames, partObjectInspectors); @@ -292,10 +305,22 @@ Arrays.asList(current, partObjectInspector)); } - private List createPartValue(String[] partKeys, Map partSpec) { - List partValues = new ArrayList(); - for (String key : partKeys) { - partValues.add(partSpec.get(key)); + private Object[] createPartValue(String[] partKeys, Map partSpec, String[] partKeyTypes) { + Object[] partValues = new Object[partKeys.length]; + for (int i = 0; i < partKeys.length; i++) { + String key = partKeys[i]; + ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector + (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); + // Partitions do not exist for this table + if (partSpec == null) { + // for partitionless table, initialize partValue to null + partValues[i] = null; + } else { + partValues[i] = + ObjectInspectorConverters. + getConverter(PrimitiveObjectInspectorFactory. + javaStringObjectInspector, oi).convert(partSpec.get(key)); + } } return partValues; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (revision 1580963) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (working copy) @@ -18,6 +18,8 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.io.IOException; +import java.sql.Date; +import java.sql.Timestamp; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashMap; @@ -28,6 +30,8 @@ import java.util.regex.Pattern; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.common.type.Decimal128; +import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; @@ -38,12 +42,17 @@ import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileSplit; @@ -54,7 +63,7 @@ * with the partition column. */ public class VectorizedRowBatchCtx { - + // OI for raw row data (EG without partition cols) private StructObjectInspector rawRowOI; @@ -65,8 +74,11 @@ private Deserializer deserializer; // Hash map of partition values. Key=TblColName value=PartitionValue - private Map partitionValues; - + private Map partitionValues; + + //partition types + private Map partitionTypes; + // Column projection list - List of column indexes to include. This // list does not contain partition columns private List colsToInclude; @@ -86,11 +98,13 @@ * Hash map of partition values. Key=TblColName value=PartitionValue */ public VectorizedRowBatchCtx(StructObjectInspector rawRowOI, StructObjectInspector rowOI, - Deserializer deserializer, Map partitionValues) { + Deserializer deserializer, Map partitionValues, + Map partitionTypes) { this.rowOI = rowOI; this.rawRowOI = rawRowOI; this.deserializer = deserializer; this.partitionValues = partitionValues; + this.partitionTypes = partitionTypes; } /** @@ -173,25 +187,44 @@ // raw row object inspector (row with out partition col) LinkedHashMap partSpec = part.getPartSpec(); String[] partKeys = pcols.trim().split("/"); + String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES); + String[] partKeyTypes = pcolTypes.trim().split(":"); + + if (partKeys.length != partKeyTypes.length) { + throw new HiveException("Internal error : partKeys length, " +partKeys.length + + " not the same as partKeyTypes length, " + partKeyTypes.length); + } + List partNames = new ArrayList(partKeys.length); - partitionValues = new LinkedHashMap(); - List partObjectInspectors = new ArrayList( - partKeys.length); + List partObjectInspectors = new ArrayList(partKeys.length); + partitionValues = new LinkedHashMap(); + Map categoryToConverter = new LinkedHashMap(); for (int i = 0; i < partKeys.length; i++) { String key = partKeys[i]; - partNames.add(key); + partNames.add(i, key); + ObjectInspector objectInspector = null; + Object objectVal; + partitionTypes.put(key, PrimitiveCategory.STRING); if (partSpec == null) { // for partitionless table, initialize partValue to empty string. // We can have partitionless table even if we have partition keys // when there is only only partition selected and the partition key is not // part of the projection/include list. - partitionValues.put(key, ""); + objectVal = ""; + objectInspector = PrimitiveObjectInspectorFactory.javaStringObjectInspector; } else { - partitionValues.put(key, partSpec.get(key)); + // Create a Standard java object Inspector + objectInspector = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo( + TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i])); + objectVal = + ObjectInspectorConverters. + getConverter(PrimitiveObjectInspectorFactory. + javaStringObjectInspector, objectInspector). + convert(partSpec.get(key)); + partitionTypes.put(key, TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]).getPrimitiveCategory()); } - - partObjectInspectors - .add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); + partitionValues.put(key, objectVal); + partObjectInspectors.add(i, objectInspector); } // Create partition OI @@ -213,7 +246,7 @@ colsToInclude = ColumnProjectionUtils.getReadColumnIDs(hiveConf); } - + /** * Creates a Vectorized row batch and the column vectors. * @@ -274,8 +307,7 @@ + foi.getCategory()); default: throw new HiveException("Unknown ObjectInspector category!"); - - } + } } } result.numCols = fieldRefs.size(); @@ -334,7 +366,7 @@ } throw new HiveException("Not able to find column name in row object inspector"); } - + /** * Add the partition values to the batch * @@ -344,17 +376,165 @@ public void addPartitionColsToBatch(VectorizedRowBatch batch) throws HiveException { int colIndex; - String value; - BytesColumnVector bcv; + Object value; + PrimitiveCategory pCategory; if (partitionValues != null) { for (String key : partitionValues.keySet()) { colIndex = getColIndexBasedOnColName(key); value = partitionValues.get(key); - bcv = (BytesColumnVector) batch.cols[colIndex]; - bcv.setRef(0, value.getBytes(), 0, value.length()); - bcv.isRepeating = true; - bcv.isNull[0] = false; - bcv.noNulls = true; + pCategory = partitionTypes.get(key); + + switch (pCategory) { + case BOOLEAN: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill((Boolean)value == true ? 1 : 0); + lcv.isNull[0] = false; + } + } + break; + + case BYTE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill((Byte)value); + lcv.isNull[0] = false; + } + } + break; + + case SHORT: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill((Short)value); + lcv.isNull[0] = false; + } + } + break; + + case INT: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill((Integer)value); + lcv.isNull[0] = false; + } + } + break; + + case LONG: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill((Long)value); + lcv.isNull[0] = false; + } + } + break; + + case DATE: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill(((Date)value).getTime()); + lcv.isNull[0] = false; + } + } + break; + + case TIMESTAMP: { + LongColumnVector lcv = (LongColumnVector) batch.cols[colIndex]; + if (value == null) { + lcv.noNulls = false; + lcv.isNull[0] = true; + lcv.isRepeating = true; + } else { + lcv.fill((long)(((Timestamp) value).getTime())); + lcv.isNull[0] = false; + } + } + break; + + case FLOAT: { + DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[colIndex]; + if (value == null) { + dcv.noNulls = false; + dcv.isNull[0] = true; + dcv.isRepeating = true; + } else { + dcv.fill((Float) value); + dcv.isNull[0] = false; + } + } + break; + + case DOUBLE: { + DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[colIndex]; + if (value == null) { + dcv.noNulls = false; + dcv.isNull[0] = true; + dcv.isRepeating = true; + } else { + dcv.fill((Double) value); + dcv.isNull[0] = false; + } + } + break; + + case DECIMAL: { + DecimalColumnVector dv = (DecimalColumnVector) batch.cols[colIndex]; + if (value == null) { + dv.noNulls = false; + dv.isNull[0] = true; + dv.isRepeating = true; + } else { + HiveDecimal hd = (HiveDecimal)(value); + dv.vector[0] = new Decimal128(hd.toString(), (short)hd.scale()); + dv.isRepeating = true; + dv.isNull[0] = false; + } + } + break; + + case STRING: { + BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex]; + String sVal = (String)value; + if (sVal == null) { + bcv.noNulls = false; + bcv.isNull[0] = true; + bcv.isRepeating = true; + } else { + bcv.fill(sVal.getBytes()); + bcv.isNull[0] = false; + } + } + break; + + default: + throw new HiveException("Unable to recognize the partition type " + pCategory + + " for column " + key); + } } } }