Index: metastore/src/model/package.jdo =================================================================== --- metastore/src/model/package.jdo (revision 1148945) +++ metastore/src/model/package.jdo (working copy) @@ -191,17 +191,17 @@ - + - + - + - + @@ -214,6 +214,15 @@ + + + + + + + + + Index: metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java =================================================================== --- metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java (revision 1148945) +++ metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java (working copy) @@ -22,7 +22,7 @@ import java.util.Map; public class MStorageDescriptor { - private List cols; + private MColumnDescriptor cd; private String location; private String inputFormat; private String outputFormat; @@ -32,12 +32,12 @@ private List bucketCols; private List sortCols; private Map parameters; - + public MStorageDescriptor() {} - + /** - * @param cols + * @param cd * @param location * @param inputFormat * @param outputFormat @@ -48,10 +48,10 @@ * @param sortOrder * @param parameters */ - public MStorageDescriptor(List cols, String location, String inputFormat, + public MStorageDescriptor(MColumnDescriptor cd, String location, String inputFormat, String outputFormat, boolean isCompressed, int numBuckets, MSerDeInfo serDeInfo, List bucketCols, List sortOrder, Map parameters) { - this.cols = cols; + this.cd = cd; this.location = location; this.inputFormat = inputFormat; this.outputFormat = outputFormat; @@ -163,17 +163,17 @@ } /** - * @return the cols + * @return the column descriptor */ - public List getCols() { - return cols; + public MColumnDescriptor getCD() { + return cd; } /** - * @param cols the cols to set + * @param cd the Column Descriptor to set */ - public void setCols(List cols) { - this.cols = cols; + public void setCD(MColumnDescriptor cd) { + this.cd = cd; } /** Index: metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java =================================================================== --- metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (revision 1148945) +++ metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (working copy) @@ -79,6 +79,7 @@ import org.apache.hadoop.hive.metastore.api.UnknownDBException; import org.apache.hadoop.hive.metastore.api.UnknownPartitionException; import org.apache.hadoop.hive.metastore.api.UnknownTableException; +import org.apache.hadoop.hive.metastore.model.MColumnDescriptor; import org.apache.hadoop.hive.metastore.model.MDBPrivilege; import org.apache.hadoop.hive.metastore.model.MDatabase; import org.apache.hadoop.hive.metastore.model.MFieldSchema; @@ -701,7 +702,17 @@ if (partColGrants != null && partColGrants.size() > 0) { pm.deletePersistentAll(partColGrants); } - pm.deletePersistentAll(listMPartitions(dbName, tableName, -1)); + + //call dropPartition on each of the table's partitions to follow the + // procedure for cleanly dropping partitions. + List partsToDelete = listMPartitions(dbName, tableName, -1); + if (partsToDelete != null) { + for (MPartition mpart : listMPartitions(dbName, tableName, -1)) { + dropPartitionCommon(mpart); + } + } + + dropStorageDescriptorCleanly(tbl.getSd()); // then remove the table pm.deletePersistentAll(tbl); } @@ -885,6 +896,7 @@ } } + //A new table is always created with a new column descriptor return new MTable(tbl.getTableName().toLowerCase(), mdb, convertToMStorageDescriptor(tbl.getSd()), tbl.getOwner(), tbl .getCreateTime(), tbl.getLastAccessTime(), tbl.getRetention(), @@ -955,6 +967,18 @@ .getParameters()); } + /** + * Given a list of model field schemas, create a new model column descriptor. + * @param cols the columns the column descriptor contains + * @return a new column descriptor db-backed object + */ + private MColumnDescriptor createNewMColumnDescriptor(List cols) { + if (cols == null) { + return null; + } + return new MColumnDescriptor(cols); + } + // MSD and SD should be same objects. Not sure how to make then same right now // MSerdeInfo *& SerdeInfo should be same as well private StorageDescriptor convertToStorageDescriptor(MStorageDescriptor msd, @@ -963,7 +987,7 @@ if (msd == null) { return null; } - return new StorageDescriptor(noFS ? null: convertToFieldSchemas(msd.getCols()), + return new StorageDescriptor(noFS ? null: convertToFieldSchemas(msd.getCD().getCols()), msd.getLocation(), msd.getInputFormat(), msd.getOutputFormat(), msd .isCompressed(), msd.getNumBuckets(), converToSerDeInfo(msd .getSerDeInfo()), msd.getBucketCols(), convertToOrders(msd @@ -975,12 +999,37 @@ return convertToStorageDescriptor(msd, false); } + /** + * Converts a storage descriptor to a db-backed storage descriptor. Creates a + * new db-backed column descriptor object for this SD. + * @param sd the storage descriptor to wrap in a db-backed object + * @return the storage descriptor db-backed object + * @throws MetaException + */ private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd) throws MetaException { if (sd == null) { return null; } - return new MStorageDescriptor(convertToMFieldSchemas(sd.getCols()), sd + MColumnDescriptor mcd = createNewMColumnDescriptor(convertToMFieldSchemas(sd.getCols())); + return convertToMStorageDescriptor(sd, mcd); + } + + /** + * Converts a storage descriptor to a db-backed storage descriptor. It points the + * storage descriptor's column descriptor to the one passed as an argument, + * so it does not create a new mcolumn descriptor object. + * @param sd the storage descriptor to wrap in a db-backed object + * @param mcd the db-backed column descriptor + * @return the db-backed storage descriptor object + * @throws MetaException + */ + private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd, + MColumnDescriptor mcd) throws MetaException { + if (sd == null) { + return null; + } + return new MStorageDescriptor(mcd, sd .getLocation(), sd.getInputFormat(), sd.getOutputFormat(), sd .isCompressed(), sd.getNumBuckets(), converToMSerDeInfo(sd .getSerdeInfo()), sd.getBucketCols(), @@ -1096,10 +1145,25 @@ throw new InvalidObjectException( "Partition doesn't have a valid table or database name"); } + + //if this partition's set of columns is the same as the parent table's, + //use the parent table's, so we do not create a duplicate column descriptor, + //thereby saving space + MStorageDescriptor msd; + if (mt.getSd() != null && mt.getSd().getCD() != null && + mt.getSd().getCD().getCols() != null && + part.getSd() != null && + convertToFieldSchemas(mt.getSd().getCD().getCols()). + equals(part.getSd().getCols())) { + msd = convertToMStorageDescriptor(part.getSd(), mt.getSd().getCD()); + } else { + msd = convertToMStorageDescriptor(part.getSd()); + } + return new MPartition(Warehouse.makePartName(convertToFieldSchemas(mt .getPartitionKeys()), part.getValues()), mt, part.getValues(), part .getCreateTime(), part.getLastAccessTime(), - convertToMStorageDescriptor(part.getSd()), part.getParameters()); + msd, part.getParameters()); } private Partition convertToPart(MPartition mpart) throws MetaException { @@ -1122,33 +1186,58 @@ mpart.getParameters()); } + @Override public boolean dropPartition(String dbName, String tableName, List part_vals) throws MetaException { boolean success = false; try { openTransaction(); MPartition part = getMPartition(dbName, tableName, part_vals); + dropPartitionCommon(part); + success = commitTransaction(); + } finally { + if (!success) { + rollbackTransaction(); + } + } + return success; + } + + /** + * Drop an MPartition and cascade deletes (e.g., delete partition privilege grants, + * drop the storage descriptor cleanly, etc.) + * @param part - the MPartition to drop + * @return whether the transaction committed successfully + */ + private boolean dropPartitionCommon(MPartition part) { + boolean success = false; + try { + openTransaction(); if (part != null) { List schemas = part.getTable().getPartitionKeys(); List colNames = new ArrayList(); for (MFieldSchema col: schemas) { colNames.add(col.getName()); } - String partName = FileUtils.makePartName(colNames, part_vals); + String partName = FileUtils.makePartName(colNames, part.getValues()); List partGrants = listPartitionGrants( - dbName, tableName, partName); + part.getTable().getDatabase().getName(), + part.getTable().getTableName(), + partName); if (partGrants != null && partGrants.size() > 0) { pm.deletePersistentAll(partGrants); } List partColumnGrants = listPartitionAllColumnGrants( - dbName, tableName, partName); + part.getTable().getDatabase().getName(), + part.getTable().getTableName(), + partName); if (partColumnGrants != null && partColumnGrants.size() > 0) { pm.deletePersistentAll(partColumnGrants); } - + dropStorageDescriptorCleanly(part.getSd()); pm.deletePersistent(part); } success = commitTransaction(); @@ -1658,7 +1747,9 @@ oldt.setTableName(newt.getTableName().toLowerCase()); oldt.setParameters(newt.getParameters()); oldt.setOwner(newt.getOwner()); - oldt.setSd(newt.getSd()); + //fully copy over the contents of new SD into old SD, + // so we don't create an extra SD in the metastore db that has no references. + fullCopyMSD(newt.getSd(), oldt.getSd()); oldt.setDatabase(newt.getDatabase()); oldt.setRetention(newt.getRetention()); oldt.setPartitionKeys(newt.getPartitionKeys()); @@ -1737,7 +1828,11 @@ private void copyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) { oldSd.setLocation(newSd.getLocation()); - oldSd.setCols(newSd.getCols()); + MColumnDescriptor oldCD = oldSd.getCD(); + //If oldCd does not have any more references, then we should delete it + // from the backend db + removeUnusedColumnDescriptor(oldCD); + oldSd.setCD(newSd.getCD()); oldSd.setBucketCols(newSd.getBucketCols()); oldSd.setCompressed(newSd.isCompressed()); oldSd.setInputFormat(newSd.getInputFormat()); @@ -1749,6 +1844,88 @@ oldSd.getSerDeInfo().setParameters(newSd.getSerDeInfo().getParameters()); } + /** + * copy over all fields from newSd to oldSd + * @param newSd the new storage descriptor + * @param oldSd the old descriptor that gets copied over + */ + private void fullCopyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) { + if (oldSd == null) { + oldSd = new MStorageDescriptor(); + pm.makePersistent(oldSd); + } + copyMSD(newSd, oldSd); + oldSd.setSortCols(newSd.getSortCols()); + oldSd.setParameters(newSd.getParameters()); + } + + /** + * Checks if a column descriptor has any remaining references by storage descriptors + * in the db. If it does not, then delete the CD. If it does, then do nothing. + * @param oldCD the column descriptor to delete if it is no longer referenced anywhere + */ + private void removeUnusedColumnDescriptor(MColumnDescriptor oldCD) { + if (oldCD == null) { + return; + } + LOG.debug("execute removeUnusedColumnDescriptor"); + //if no other SD references this CD, we can throw it out. + List referencedSDs = listStorageDescriptorsWithCD(oldCD); + + if (referencedSDs != null && referencedSDs.isEmpty()) { + LOG.debug("no references were found to CD with id: " + JDOHelper.getObjectId(oldCD)); + boolean success = false; + try { + openTransaction(); + pm.retrieve(oldCD); + pm.deletePersistent(oldCD); + success = commitTransaction(); + LOG.debug("successfully deleted a CD in removeUnusedColumnDescriptor"); + } finally { + if (!success) { + rollbackTransaction(); + } + } + } + } + + private void dropStorageDescriptorCleanly(MStorageDescriptor msd) { + if (msd == null || msd.getCD() == null) { + return; + } + + MColumnDescriptor mcd = msd.getCD(); + msd.setCD(null); + removeUnusedColumnDescriptor(mcd); + } + + /** + * Get a list of storage descriptors that reference a particular Column Descriptor + * @param oldCD the column descriptor to get storage descriptors for + * @return a list of storage descriptors + */ + private List listStorageDescriptorsWithCD(MColumnDescriptor oldCD) { + boolean success = false; + List sds = null; + try { + openTransaction(); + LOG.debug("Executing listStorageDescriptorsWithCD"); + Query query = pm.newQuery(MStorageDescriptor.class, + "this.cd == inCD"); + query.declareParameters("MColumnDescriptor inCD"); + sds = (List) query.execute(oldCD); + LOG.debug("Done executing query for listStorageDescriptorsWithCD"); + pm.retrieveAll(sds); + success = commitTransaction(); + LOG.debug("Done retrieving all objects for listStorageDescriptorsWithCD"); + } finally { + if (!success) { + rollbackTransaction(); + } + } + return sds; + } + @Override public boolean addIndex(Index index) throws InvalidObjectException, MetaException {