diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java index 23068f8..5f8edeb 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java @@ -1000,8 +1000,38 @@ public static Properties getPartSchemaFromTableSchema( return schema; } - public static Properties getSchema( - org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, + private static Properties addCols(Properties schema, List cols) { + + StringBuilder colNameBuf = new StringBuilder(); + StringBuilder colTypeBuf = new StringBuilder(); + StringBuilder colComment = new StringBuilder(); + + boolean first = true; + for (FieldSchema col : cols) { + if (!first) { + colNameBuf.append(","); + colTypeBuf.append(":"); + colComment.append('\0'); + } + colNameBuf.append(col.getName()); + colTypeBuf.append(col.getType()); + colComment.append((null != col.getComment()) ? col.getComment() : ""); + first = false; + } + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, + colNameBuf.toString()); + String colTypes = colTypeBuf.toString(); + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, + colTypes); + schema.setProperty("columns.comments", colComment.toString()); + + return schema; + + } + + public static Properties getSchemaWithoutCols(org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd, Map parameters, String databaseName, String tableName, List partitionKeys) { @@ -1051,30 +1081,7 @@ public static Properties getSchema( .getSerdeInfo().getSerializationLib()); } } - StringBuilder colNameBuf = new StringBuilder(); - StringBuilder colTypeBuf = new StringBuilder(); - StringBuilder colComment = new StringBuilder(); - boolean first = true; - for (FieldSchema col : tblsd.getCols()) { - if (!first) { - colNameBuf.append(","); - colTypeBuf.append(":"); - colComment.append('\0'); - } - colNameBuf.append(col.getName()); - colTypeBuf.append(col.getType()); - colComment.append((null != col.getComment()) ? col.getComment() : ""); - first = false; - } - String colNames = colNameBuf.toString(); - String colTypes = colTypeBuf.toString(); - schema.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, - colNames); - schema.setProperty( - org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, - colTypes); - schema.setProperty("columns.comments", colComment.toString()); + if (sd.getCols() != null) { schema.setProperty( org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_DDL, @@ -1118,6 +1125,15 @@ public static Properties getSchema( return schema; } + public static Properties getSchema( + org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, + org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd, + Map parameters, String databaseName, String tableName, + List partitionKeys) { + + return addCols(getSchemaWithoutCols(sd, tblsd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols()); + } + /** * Convert FieldSchemas to columnNames. */ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java index 99724c1..b4dd582 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapOperator.java @@ -21,7 +21,6 @@ import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; @@ -31,16 +30,13 @@ import java.util.Properties; import java.util.Set; import java.util.TreeMap; -import java.util.concurrent.Future; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; -import org.apache.hadoop.hive.ql.exec.MapOperator.MapOpCtx; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; import org.apache.hadoop.hive.ql.io.RecordIdentifier; -import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.MapWork; @@ -57,14 +53,12 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.StringUtils; diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index dacb80f..57bfe13 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -1355,6 +1355,39 @@ public static TableDesc getTableDesc(Table tbl) { .getOutputFormatClass(), props)); } + public static TableDesc getTableDesc(Table tbl, List neededCols) { + Properties props = MetaStoreUtils.getSchemaWithoutCols(tbl.getSd(), tbl.getSd(), + tbl.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys()); + props.put(serdeConstants.SERIALIZATION_LIB, tbl.getDeserializer().getClass().getName()); + addReferencedCols(props, neededCols, tbl.getCols()); + return (new TableDesc(tbl.getInputFormatClass(), tbl + .getOutputFormatClass(), props)); + } + + public static void addReferencedCols(Properties props, List neededCols, List allCols) { + StringBuilder referencedCols = new StringBuilder(); + StringBuilder colTypes = new StringBuilder(); + StringBuilder comments = new StringBuilder(); + boolean first = true; + for (FieldSchema fs : allCols) { + if (neededCols.contains(fs.getName())){ + if (!first) { + referencedCols.append(","); + colTypes.append(":"); + comments.append('\0'); + } else { + first = false; + } + referencedCols.append(fs.getName()); + colTypes.append(fs.getType()); + comments.append(fs.getComment()); + } + } + props.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, referencedCols.toString()); + props.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, colTypes.toString()); + props.setProperty("columns.comments", comments.toString()); + } + // column names and column types are all delimited by comma public static TableDesc getTableDesc(String cols, String colTypes) { return (new TableDesc(SequenceFileInputFormat.class, @@ -1365,8 +1398,8 @@ public static TableDesc getTableDesc(String cols, String colTypes) { serdeConstants.SERIALIZATION_LIB,LazySimpleSerDe.class.getName()))); } - public static PartitionDesc getPartitionDesc(Partition part) throws HiveException { - return (new PartitionDesc(part)); + public static PartitionDesc getPartitionDesc(Partition part, List colNames) throws HiveException { + return (new PartitionDesc(part, colNames)); } public static PartitionDesc getPartitionDescFromTableDesc(TableDesc tblDesc, Partition part, diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index 0cd7b62..70e45d0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -115,6 +115,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.mapred.InputFormat; +import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.Interner; @@ -511,7 +512,7 @@ public static void setMapWork(MapWork plan, ParseContext parseCtx, Set neededCols) throws HiveException { + Table tbl = part.getTable(); + PartitionDescConstructorHelper(part, Utilities.getTableDesc(part.getTable(), neededCols), true); + Properties schema = MetaStoreUtils.getSchemaWithoutCols(part.getTPartition().getSd(), part.getTPartition().getSd(), + part.getParameters(), tbl.getDbName(), tbl.getTableName(), tbl.getPartitionKeys()); + Utilities.addReferencedCols(schema, neededCols, part.getCols()); + setProperties(schema); + } + + /** * @param part Partition * @param tblDesc Table Descriptor * @param usePartSchemaProperties Use Partition Schema Properties to set the @@ -189,7 +202,7 @@ public void setOutputFileFormatClass(final Class outputFileFormatClass) { Class outputClass = outputFileFormatClass == null ? null : HiveFileFormatUtils.getOutputFormatSubstitute(outputFileFormatClass); if (outputClass != null) { - this.outputFileFormatClass = (Class) + this.outputFileFormatClass = (Class) CLASS_INTERNER.intern(outputClass); } else { this.outputFileFormatClass = outputClass;