Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (revision 1537305) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java (working copy) @@ -22,12 +22,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Properties; -import java.util.Set; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -149,7 +147,12 @@ } private Object readRow(Writable value) throws SerDeException { - return partTblObjectInspectorConverter.convert(deserializer.deserialize(value)); + if (partTblObjectInspectorConverter == null) { + return deserializer.deserialize(value); + } + else { + return partTblObjectInspectorConverter.convert(deserializer.deserialize(value)); + } } public StructObjectInspector getRowObjectInspector() { @@ -200,8 +203,15 @@ opCtx.tblRawRowObjectInspector = convertedOI.get(td); - opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( - partRawRowObjectInspector, opCtx.tblRawRowObjectInspector); + // If we can by-pass the ObjectInspectorConverters fn calls, set the + // partTblObjectInspectorConverter to null. + if (canSkipOIConversion(pd)) { + opCtx.partTblObjectInspectorConverter = null; + } + else { + opCtx.partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( + partRawRowObjectInspector, opCtx.tblRawRowObjectInspector); + } // Next check if this table has partitions and if so // get the list of partition names as well as allocate @@ -263,61 +273,113 @@ return opCtx; } - // Return the mapping for table descriptor to the expected table OI /** - * Traverse all the partitions for a table, and get the OI for the table. - * Note that a conversion is required if any of the partition OI is different - * from the table OI. For eg. if the query references table T (partitions P1, P2), - * and P1's schema is same as T, whereas P2's scheme is different from T, conversion - * might be needed for both P1 and P2, since SettableOI might be needed for T + * This function is used to check if ObjectInspectorConverters.*() invocation can be skipped + * within getConvertedOI() and initObjectInspector(). + * @param pd partition descriptor for the current partition + * @return true if ObjectInspectorConverters need not be used to process the given partition + * else, return false. */ + private boolean canSkipOIConversion(PartitionDesc pd) throws HiveException { + if (pd == null) { + return false; + } + boolean canSkipOIConv = false; + String reasonCode = "Unknown"; + + try { + do { + if (!isPartitioned(pd)) { + canSkipOIConv = true; + reasonCode = "Table not partitioned"; + break; + } + + TableDesc td = pd.getTableDesc(); + String tableSerDeClassName = td == null ? null : td.getSerdeClassName(); + String partSerDeClassName = pd.getSerdeClassName(); + + if (tableSerDeClassName != null && partSerDeClassName != null && + tableSerDeClassName.equals(partSerDeClassName)) { + canSkipOIConv = true; + reasonCode = "tableSerDe and partSerDe have same class, " + + tableSerDeClassName; + break; + } + } while (false); + } + catch (Exception e) { + throw new HiveException(e); + } + + if (LOG.isDebugEnabled() && canSkipOIConv) { + LOG.debug("canSkipOIConversion returns true, Reason: " + + reasonCode); + } + return canSkipOIConv; + } + + /** + * Return the mapping for table descriptor to the expected table OI. + * Traverse all the partitions and get the OI for the table + * associated with it. Note that a conversion is required if any of the + * partition OI is different from the table OI. For e.g. if the query references + * table T (partitions P1, P2), and P1's schema is same as T, whereas P2's scheme + * is different from T, conversion might be needed for both P1 and P2, since + * SettableOI might be needed for T. + */ private Map getConvertedOI(Configuration hconf) throws HiveException { - Map tableDescOI = + // The [TableDescriptor:ObjectInspector] map returned at the end of this function. + Map tableDescToOI = new HashMap(); - Set identityConverterTableDesc = new HashSet(); + try { + // The settable properties map used for ObjectInspectorConverters.getConvertedOI(). Map oiSettableProperties = new HashMap(); - for (String onefile : conf.getPathToAliases().keySet()) { - PartitionDesc pd = conf.getPathToPartitionInfo().get(onefile); - TableDesc tableDesc = pd.getTableDesc(); + // Iterate over all the partitions. + for (PartitionDesc partDesc : conf.getPartitionDescs()) { + TableDesc tableDesc = partDesc.getTableDesc(); Properties tblProps = tableDesc.getProperties(); - // If the partition does not exist, use table properties - Properties partProps = isPartitioned(pd) ? pd.getOverlayedProperties() : tblProps; - Class sdclass = hconf.getClassByName(pd.getSerdeClassName()); - Deserializer partDeserializer = (Deserializer) sdclass.newInstance(); - partDeserializer.initialize(hconf, partProps); - StructObjectInspector partRawRowObjectInspector = (StructObjectInspector) partDeserializer - .getObjectInspector(); + StructObjectInspector tblRawRowObjectInspector = null; - StructObjectInspector tblRawRowObjectInspector = tableDescOI.get(tableDesc); - if ((tblRawRowObjectInspector == null) || - (identityConverterTableDesc.contains(tableDesc))) { - sdclass = hconf.getClassByName(tableDesc.getSerdeClassName()); - Deserializer tblDeserializer = (Deserializer) sdclass.newInstance(); + // If tableDescToOI does not contain the entry for the current partition's table, + // get the value of tblRawRowObjectInspector using the tblDeserializer + if (!tableDescToOI.containsKey(tableDesc)) { + Deserializer tblDeserializer = (Deserializer) + (hconf.getClassByName(tableDesc.getSerdeClassName()).newInstance()); tblDeserializer.initialize(hconf, tblProps); - tblRawRowObjectInspector = - (StructObjectInspector) ObjectInspectorConverters.getConvertedOI( - partRawRowObjectInspector, - tblDeserializer.getObjectInspector(), oiSettableProperties); - - if (identityConverterTableDesc.contains(tableDesc)) { - if (!partRawRowObjectInspector.equals(tblRawRowObjectInspector)) { - identityConverterTableDesc.remove(tableDesc); - } - } - else if (partRawRowObjectInspector.equals(tblRawRowObjectInspector)) { - identityConverterTableDesc.add(tableDesc); - } - - tableDescOI.put(tableDesc, tblRawRowObjectInspector); + tblRawRowObjectInspector = (StructObjectInspector)tblDeserializer.getObjectInspector(); } + // If the tableDescToOI contains entry for the current partition's table, get the + // value of tblRawRowObjectInspector directly from tableDescToOI + else { + tblRawRowObjectInspector = tableDescToOI.get(tableDesc); + } + // Integrate the current partition's OI with tableDescToOI. + // See if we can skip ObjectInspectorConverters fn calls. + if (canSkipOIConversion(partDesc)) { + tableDescToOI.put(tableDesc, tblRawRowObjectInspector); + } + else { + Deserializer partDeserializer = (Deserializer) + (hconf.getClassByName(partDesc.getSerdeClassName()).newInstance()); + Properties partProps = isPartitioned(partDesc) ? + partDesc.getOverlayedProperties() : tblProps; + partDeserializer.initialize(hconf, partProps); + // We need to get the object inspector associated with the current table. + tableDescToOI.put(tableDesc, + (StructObjectInspector)(ObjectInspectorConverters.getConvertedOI( + partDeserializer.getObjectInspector(), + tblRawRowObjectInspector, oiSettableProperties))); + } } - } catch (Exception e) { + } + catch (Exception e) { throw new HiveException(e); } - return tableDescOI; + return tableDescToOI; } private boolean isPartitioned(PartitionDesc pd) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (revision 1537305) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (working copy) @@ -358,6 +358,40 @@ */ private static Map oiSettableProperties = new HashMap(); + /** This function is used to check if ObjectInspectorConverters.*() invocation can be skipped + * within fns such as RecordReader() and getOutputObjectInspector(). + * @param tableSerDe : table level SerDe + * @param partSerDe : partition level SerDe + * @return : true if ObjectInspectorConverters need not be used to process the given partition + * else, return false. + */ + private boolean canSkipOIConversion( + Deserializer tableSerDe, Deserializer partSerDe) { + boolean canSkipOIConv = false; + String reasonCode = "Unknown"; + do { + if (work.isNotPartitioned()) { + canSkipOIConv = true; + reasonCode = "Work not partitioned"; + break; + } + if (tableSerDe != null && partSerDe != null && tableSerDe.getClass() != null && + partSerDe.getClass() != null && + tableSerDe.getClass().getName().equals(partSerDe.getClass().getName())) { + canSkipOIConv = true; + reasonCode = "tableSerDe and partSerDe have same class, " + tableSerDe.getClass().getName(); + break; + } + + } while (false); + + if (LOG.isDebugEnabled() && canSkipOIConv) { + LOG.debug("canSkipOIConversion returns true, Reason: " + + reasonCode); + } + return canSkipOIConv; + } + private RecordReader getRecordReader() throws Exception { if (currPath == null) { getNextPath(); @@ -411,14 +445,23 @@ tblSerde.initialize(job, currPart.getTableDesc().getProperties()); } - ObjectInspector outputOI = ObjectInspectorConverters.getConvertedOI( - serde.getObjectInspector(), - partitionedTableOI == null ? tblSerde.getObjectInspector() : partitionedTableOI, - oiSettableProperties); + ObjectInspector outputOI; - partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( - serde.getObjectInspector(), outputOI); + // Skip all the ObjectInspectorConverters fn calls if possible + if (canSkipOIConversion(serde, tblSerde)) { + partTblObjectInspectorConverter = null; + outputOI = serde.getObjectInspector(); + } + else { + outputOI = ObjectInspectorConverters.getConvertedOI( + serde.getObjectInspector(), + partitionedTableOI == null ? tblSerde.getObjectInspector() : partitionedTableOI, + oiSettableProperties); + partTblObjectInspectorConverter = ObjectInspectorConverters.getConverter( + serde.getObjectInspector(), outputOI); + } + if (LOG.isDebugEnabled()) { LOG.debug("Creating fetchTask with deserializer typeinfo: " + serde.getObjectInspector().getTypeName()); @@ -544,7 +587,8 @@ vcValues = MapOperator.populateVirtualColumnValues(context, vcCols, vcValues, serde); row[isPartitioned ? 2 : 1] = vcValues; } - row[0] = partTblObjectInspectorConverter.convert(serde.deserialize(value)); + row[0] = partTblObjectInspectorConverter == null ? + serde.deserialize(value) : partTblObjectInspectorConverter.convert(serde.deserialize(value)); if (hasVC || isPartitioned) { inspectable.o = row; @@ -638,12 +682,17 @@ partition = listPart; Deserializer partSerde = listPart.getDeserializer(); partSerde.initialize(job, listPart.getOverlayedProperties()); - - partitionedTableOI = ObjectInspectorConverters.getConvertedOI( - partSerde.getObjectInspector(), tableOI, oiSettableProperties); - if (!partitionedTableOI.equals(tableOI)) { - break; + // Skip the getConvertedOI calls if possible + if (canSkipOIConversion(tblSerde, partSerde)) { + partitionedTableOI = tableOI; } + else { + partitionedTableOI = ObjectInspectorConverters.getConvertedOI( + partSerde.getObjectInspector(), tableOI, oiSettableProperties); + if (!partitionedTableOI.equals(tableOI)) { + break; + } + } } return getRowInspectorFromPartition(partition, partitionedTableOI); } catch (Exception e) {