Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (revision 906820) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java (working copy) @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.metadata; import java.io.IOException; +import java.io.Serializable; import java.net.URI; import java.util.ArrayList; import java.util.HashMap; @@ -57,115 +58,67 @@ * A Hive Table: is a fundamental unit of data in Hive that shares a common * schema/DDL */ -public class Table { +public class Table implements Serializable { + private static final long serialVersionUID = 1L; + static final private Log LOG = LogFactory.getLog("hive.ql.metadata.Table"); - private Properties schema; - private Deserializer deserializer; - private URI uri; - private Class inputFormatClass; - private Class outputFormatClass; + /** This is the only serialized field + */ private org.apache.hadoop.hive.metastore.api.Table tTable; - /** - * Table (only used internally) - * - * @throws HiveException - * - */ - protected Table() throws HiveException { + public Table() { } - /** - * Table - * - * Create a TableMetaInfo object presumably with the intent of saving it to - * the metastore - * - * @param name - * the name of this table in the metadb - * @param schema - * an object that represents the schema that this SerDe must know - * @param deserializer - * a Class to be used for deserializing the data - * @param dataLocation - * where is the table ? (e.g., - * dfs://hadoop001.sf2p.facebook.com:9000/ - * user/facebook/warehouse/example) NOTE: should not be hardcoding - * this, but ok for now - * - * @exception HiveException - * on internal error. Note not possible now, but in the future - * reserve the right to throw an exception - */ - public Table(String name, Properties schema, Deserializer deserializer, - Class> inputFormatClass, - Class outputFormatClass, URI dataLocation, Hive hive) - throws HiveException { - initEmpty(); - this.schema = schema; - this.deserializer = deserializer; // TODO: convert to SerDeInfo format - getTTable().getSd().getSerdeInfo().setSerializationLib( - deserializer.getClass().getName()); - getTTable().setTableName(name); - getSerdeInfo().setSerializationLib(deserializer.getClass().getName()); - setInputFormatClass(inputFormatClass); - setOutputFormatClass(HiveFileFormatUtils - .getOutputFormatSubstitute(outputFormatClass)); - setDataLocation(dataLocation); + public Table(org.apache.hadoop.hive.metastore.api.Table table) { + tTable = table; } public Table(String name) { - // fill in defaults - initEmpty(); - getTTable().setTableName(name); - getTTable().setDbName(MetaStoreUtils.DEFAULT_DATABASE_NAME); - // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does - // not - // support a table with no columns. - getSerdeInfo().setSerializationLib( - MetadataTypedColumnsetSerDe.class.getName()); - getSerdeInfo().getParameters().put(Constants.SERIALIZATION_FORMAT, "1"); + this(getEmptyTable(name)); } - void initEmpty() { - setTTable(new org.apache.hadoop.hive.metastore.api.Table()); - getTTable().setSd(new StorageDescriptor()); - getTTable().setPartitionKeys(new ArrayList()); - getTTable().setParameters(new HashMap()); - - StorageDescriptor sd = getTTable().getSd(); - sd.setSerdeInfo(new SerDeInfo()); - sd.setNumBuckets(-1); - sd.setBucketCols(new ArrayList()); - sd.setCols(new ArrayList()); - sd.setParameters(new HashMap()); - sd.setSortCols(new ArrayList()); - - sd.getSerdeInfo().setParameters(new HashMap()); - - setTableType(TableType.MANAGED_TABLE); + public org.apache.hadoop.hive.metastore.api.Table getTTable() { + return tTable; } - - public void reinitSerDe() throws HiveException { - try { - deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), - getTTable()); - } catch (MetaException e) { - throw new HiveException(e); - } + + /** + * This function should only be called by Java serialization. + */ + public void setTTable(org.apache.hadoop.hive.metastore.api.Table tTable) { + this.tTable = tTable; } - - protected void initSerDe() throws HiveException { - if (deserializer == null) { - try { - deserializer = MetaStoreUtils.getDeserializer(Hive.get().getConf(), - getTTable()); - } catch (MetaException e) { - throw new HiveException(e); - } + + /** + * Initialize an emtpy table. + */ + static org.apache.hadoop.hive.metastore.api.Table getEmptyTable(String name) { + StorageDescriptor sd = new StorageDescriptor(); + { + sd.setSerdeInfo(new SerDeInfo()); + sd.setNumBuckets(-1); + sd.setBucketCols(new ArrayList()); + sd.setCols(new ArrayList()); + sd.setParameters(new HashMap()); + sd.setSortCols(new ArrayList()); + sd.getSerdeInfo().setParameters(new HashMap()); + // We have to use MetadataTypedColumnsetSerDe because LazySimpleSerDe does + // not support a table with no columns. + sd.getSerdeInfo().setSerializationLib(MetadataTypedColumnsetSerDe.class.getName()); + sd.getSerdeInfo().getParameters().put(Constants.SERIALIZATION_FORMAT, "1"); } + + org.apache.hadoop.hive.metastore.api.Table t = new org.apache.hadoop.hive.metastore.api.Table(); + { + t.setSd(sd); + t.setPartitionKeys(new ArrayList()); + t.setParameters(new HashMap()); + t.setTableType(TableType.MANAGED_TABLE.toString()); + t.setTableName(name); + t.setDbName(MetaStoreUtils.DEFAULT_DATABASE_NAME); + } + return t; } public void checkValidity() throws HiveException { @@ -232,7 +185,6 @@ * @param inputFormatClass */ public void setInputFormatClass(Class inputFormatClass) { - this.inputFormatClass = inputFormatClass; tTable.getSd().setInputFormat(inputFormatClass.getName()); } @@ -240,12 +192,11 @@ * @param class1 */ public void setOutputFormatClass(Class class1) { - outputFormatClass = HiveFileFormatUtils.getOutputFormatSubstitute(class1); tTable.getSd().setOutputFormat(class1.getName()); } final public Properties getSchema() { - return schema; + return MetaStoreUtils.getSchema(tTable); } final public Path getPath() { @@ -257,26 +208,39 @@ } final public URI getDataLocation() { - return uri; + return getPath().toUri(); } final public Deserializer getDeserializer() { - if (deserializer == null) { - try { - initSerDe(); - } catch (HiveException e) { - LOG.error("Error in initializing serde.", e); - } + try { + return MetaStoreUtils.getDeserializer(Hive.get().getConf(), getTTable()); + } catch (MetaException e) { + throw new RuntimeException(e); + } catch (HiveException e) { + throw new RuntimeException(e); } - return deserializer; } final public Class getInputFormatClass() { - return inputFormatClass; + try { + return (Class) + Class.forName(tTable.getSd().getInputFormat(), false, Hive.get().getConf().getClassLoader()); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (HiveException e) { + throw new RuntimeException(e); + } } final public Class getOutputFormatClass() { - return outputFormatClass; + try { + return (Class) + Class.forName(tTable.getSd().getOutputFormat(), false, Hive.get().getConf().getClassLoader()); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } catch (HiveException e) { + throw new RuntimeException(e); + } } final public boolean isValidSpec(Map spec) @@ -313,10 +277,6 @@ getTTable().getParameters().put(name, value); } - /** - * getProperty - * - */ public String getProperty(String name) { return getTTable().getParameters().get(name); } @@ -359,23 +319,7 @@ } } - /** - * @param schema - * the schema to set - */ - public void setSchema(Properties schema) { - this.schema = schema; - } - - /** - * @param deserializer - * the deserializer to set - */ - public void setDeserializer(Deserializer deserializer) { - this.deserializer = deserializer; - } - - @Override + @Override public String toString() { return getTTable().getTableName(); } @@ -413,26 +357,10 @@ return bcols.get(0); } - /** - * @return the tTable - */ - public org.apache.hadoop.hive.metastore.api.Table getTTable() { - return tTable; + public void setDataLocation(URI uri) { + getTTable().getSd().setLocation(uri.toString()); } - /** - * @param table - * the tTable to set - */ - protected void setTTable(org.apache.hadoop.hive.metastore.api.Table table) { - tTable = table; - } - - public void setDataLocation(URI uri2) { - uri = uri2; - getTTable().getSd().setLocation(uri2.toString()); - } - public void setBucketCols(List bucketCols) throws HiveException { if (bucketCols == null) { return; @@ -698,15 +626,6 @@ } public Table copy() throws HiveException { - Table newTbl = new Table(); - - newTbl.schema = schema; - newTbl.deserializer = deserializer; // TODO: convert to SerDeInfo format - - newTbl.setTTable(getTTable().clone()); - newTbl.uri = uri; - newTbl.inputFormatClass = inputFormatClass; - newTbl.outputFormatClass = outputFormatClass; - return newTbl; + return new Table(getTTable().clone()); } }; Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (revision 906820) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (working copy) @@ -25,8 +25,6 @@ import java.util.List; import java.util.Map; import java.util.Properties; -import java.util.regex.Matcher; -import java.util.regex.Pattern; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -65,6 +63,8 @@ private Class inputFormatClass; private Class outputFormatClass; + private String partName; + /** * @return the tPartition */ @@ -349,30 +349,7 @@ } } - /** - * mapping from a Path to the bucket number if any - */ - private static Pattern bpattern = Pattern - .compile("part-([0-9][0-9][0-9][0-9][0-9])"); - - private String partName; - @SuppressWarnings("nls") - public static int getBucketNum(Path p) { - Matcher m = bpattern.matcher(p.getName()); - if (m.find()) { - String bnum_str = m.group(1); - try { - return (Integer.parseInt(bnum_str)); - } catch (NumberFormatException e) { - throw new RuntimeException("Unexpected error parsing: " + p.getName() - + "," + bnum_str); - } - } - return 0; - } - - @SuppressWarnings("nls") public Path[] getPath(Sample s) throws HiveException { if (s == null) { return getPath(); Index: ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (revision 906820) +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java (working copy) @@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.FieldSchema; @@ -310,7 +311,6 @@ */ public void createTable(Table tbl, boolean ifNotExists) throws HiveException { try { - tbl.initSerDe(); if (tbl.getCols().size() == 0) { tbl.setFields(MetaStoreUtils.getFieldsFromDeserializer(tbl.getName(), tbl.getDeserializer())); @@ -404,7 +404,8 @@ if (tableName == null || tableName.equals("")) { throw new HiveException("empty table creation??"); } - Table table = new Table(); + + // Get the table from metastore org.apache.hadoop.hive.metastore.api.Table tTable = null; try { tTable = getMSC().getTable(dbName, tableName); @@ -417,18 +418,26 @@ } catch (Exception e) { throw new HiveException("Unable to fetch table " + tableName, e); } - // just a sanity check - assert (tTable != null); - try { - + + if (!TableType.VIRTUAL_VIEW.toString().equals(tTable.getTableType())) { + // Fix the non-printable chars + Map parameters = tTable.getSd().getParameters(); + String sf = parameters.get(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT); + if (sf != null) { + char[] b = sf.toCharArray(); + if ((b.length == 1) && (b[0] < 10)) { // ^A, ^B, ^C, ^D, \t + parameters.put(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, + Integer.toString(b[0])); + } + } + // Use LazySimpleSerDe for MetadataTypedColumnsetSerDe. // NOTE: LazySimpleSerDe does not support tables with a single column of // col // of type "array". This happens when the table is created using // an // earlier version of Hive. - if ( - org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class + if (org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe.class .getName().equals( tTable.getSd().getSerdeInfo().getSerializationLib()) && tTable.getSd().getColsSize() > 0 @@ -436,47 +445,10 @@ tTable.getSd().getSerdeInfo().setSerializationLib( org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); } - - // first get a schema (in key / vals) - Properties p = MetaStoreUtils.getSchema(tTable); - table.setSchema(p); - table.setTTable(tTable); - - if (table.isView()) { - // Skip the rest, which isn't relevant for a view. - table.checkValidity(); - return table; - } - - table - .setInputFormatClass((Class>) Class - .forName( - table - .getSchema() - .getProperty( - org.apache.hadoop.hive.metastore.api.Constants.FILE_INPUT_FORMAT, - org.apache.hadoop.mapred.SequenceFileInputFormat.class - .getName()), true, JavaUtils.getClassLoader())); - table.setOutputFormatClass(Class.forName(table.getSchema().getProperty( - org.apache.hadoop.hive.metastore.api.Constants.FILE_OUTPUT_FORMAT, - HiveSequenceFileOutputFormat.class.getName()), true, JavaUtils - .getClassLoader())); - table.setDeserializer(MetaStoreUtils.getDeserializer(getConf(), p)); - table.setDataLocation(new URI(tTable.getSd().getLocation())); - } catch (Exception e) { - LOG.error(StringUtils.stringifyException(e)); - throw new HiveException(e); } - String sf = table - .getSerdeParam(org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT); - if (sf != null) { - char[] b = sf.toCharArray(); - if ((b.length == 1) && (b[0] < 10)) { // ^A, ^B, ^C, ^D, \t - table.setSerdeParam( - org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, - Integer.toString(b[0])); - } - } + + Table table = new Table(tTable); + table.checkValidity(); return table; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 906820) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -1084,9 +1084,6 @@ tbl.getTTable().getSd().getSerdeInfo().getParameters().putAll( alterTbl.getProps()); } - // since serde is modified then do the appropriate things to reset columns - // etc - tbl.reinitSerDe(); tbl.setFields(Hive.getFieldsFromDeserializer(tbl.getName(), tbl .getDeserializer())); } else if (alterTbl.getOp() == AlterTableDesc.alterTableTypes.ADDFILEFORMAT) { @@ -1325,9 +1322,7 @@ */ if (crtTbl.getSerName() == null) { LOG.info("Default to LazySimpleSerDe for table " + crtTbl.getTableName()); - tbl - .setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class - .getName()); + tbl.setSerializationLib(org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.class.getName()); } else { // let's validate that the serde exists validateSerDe(crtTbl.getSerName()); Index: ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (revision 906820) +++ ql/src/java/org/apache/hadoop/hive/ql/QueryPlan.java (working copy) @@ -64,7 +64,14 @@ private ArrayList> rootTasks; private FetchTask fetchTask; + private HashSet inputs; + /** + * Note: outputs are not all determined at compile time. + * Some of the tasks can change the outputs at run time, because only at run + * time, we know what are the changes. These tasks should keep a reference + * to the outputs here. + */ private HashSet outputs; private HashMap idToTableNameMap;