Index: src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java =================================================================== --- src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java (revision 1396630) +++ src/java/org/apache/hcatalog/mapreduce/InputJobInfo.java (working copy) @@ -19,11 +19,28 @@ import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; import java.io.Serializable; import java.util.List; import java.util.Properties; +import java.util.zip.Deflater; +import java.util.zip.DeflaterOutputStream; +import java.util.zip.InflaterInputStream; -/** The class used to serialize and store the information read from the metadata server */ +/** + * Container for metadata read from the metadata server. Users should specify input to + * their HCatalog MR jobs as follows: + *

+ * HCatInputFormat.setInput(job, InputJobInfo.create(databaseName, tableName, filter)); + *

+ * Note: while InputJobInfo is public, + * HCATALOG-527 discusses + * removing this class from the public API, by simplifying {@link HCatInputFormat#setInput} + * to simply take the input specification arguments directly. Use InputJobInfo outside the + * above context (including serialization) at your own peril! + */ public class InputJobInfo implements Serializable { /** The serialization version */ @@ -40,7 +57,7 @@ private String filter; /** The list of partitions matching the filter. */ - private List partitions; + transient private List partitions; /** implementation specific job properties */ private Properties properties; @@ -134,4 +151,33 @@ public Properties getProperties() { return properties; } + + /** + * Serialize this object, compressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + private void writeObject(ObjectOutputStream oos) + throws IOException { + oos.defaultWriteObject(); + Deflater def = new Deflater(Deflater.BEST_COMPRESSION); + ObjectOutputStream partInfoWriter = + new ObjectOutputStream(new DeflaterOutputStream(oos, def)); + partInfoWriter.writeObject(partitions); + partInfoWriter.close(); + } + + /** + * Deserialize this object, decompressing the partitions which can exceed the + * allowed jobConf size. + * @see HCATALOG-453 + */ + @SuppressWarnings("unchecked") + private void readObject(ObjectInputStream ois) + throws IOException, ClassNotFoundException { + ois.defaultReadObject(); + ObjectInputStream partInfoReader = + new ObjectInputStream(new InflaterInputStream(ois)); + partitions = (List)partInfoReader.readObject(); + } }