diff --git metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 3f85ca6..bd9802e 100644 --- metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -2311,14 +2311,6 @@ public boolean equals(Object obj) { + dbName + "." + tblName + ": " + part); } - boolean shouldAdd = startAddPartition(ms, part, ifNotExists); - if (!shouldAdd) { - existingParts.add(part); - LOG.info("Not adding partition " + part + " as it already exists"); - continue; - } - - partFutures.add(threadPool.submit(new Callable() { @Override public Partition call() throws Exception { @@ -2476,11 +2468,7 @@ private int add_partitions_pspec_core( throw new MetaException("Partition does not belong to target table " + dbName + "." + tblName + ": " + part); } - boolean shouldAdd = startAddPartition(ms, part, ifNotExists); - if (!shouldAdd) { - LOG.info("Not adding partition " + part + " as it already exists"); - continue; - } + partFutures.add(threadPool.submit(new Callable() { @Override public Object call() throws Exception { boolean madeDir = createLocationForAddedPartition(table, part); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index a59b781..ca459a9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -1766,7 +1766,7 @@ private void checkArchiveProperty(int partSpecLevel, } private void msckAddPartitionsOneByOne(Hive db, Table table, - List partsNotInMs, List repairOutput) { + Set partsNotInMs, List repairOutput) { for (CheckResult.PartitionResult part : partsNotInMs) { try { db.createPartition(table, Warehouse.makeSpecFromName(part.getPartitionName())); @@ -1825,7 +1825,7 @@ private int msck(Hive db, MsckDesc msckDesc) { HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db); String[] names = Utilities.getDbTableName(msckDesc.getTableName()); checker.checkMetastore(names[0], names[1], msckDesc.getPartSpecs(), result); - List partsNotInMs = result.getPartitionsNotInMs(); + Set partsNotInMs = result.getPartitionsNotInMs(); if (msckDesc.isRepairPartitions() && !partsNotInMs.isEmpty()) { AbstractList vals = null; String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION); @@ -1957,7 +1957,7 @@ private int msck(Hive db, MsckDesc msckDesc) { * @throws IOException * In case the writing fails */ - private boolean writeMsckResult(List result, String msg, + private boolean writeMsckResult(Set result, String msg, Writer out, boolean wrote) throws IOException { if (!result.isEmpty()) { diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java index ec9deeb..36b9250 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java @@ -17,23 +17,23 @@ */ package org.apache.hadoop.hive.ql.metadata; -import java.util.ArrayList; -import java.util.List; +import java.util.Set; +import java.util.TreeSet; /** * Result class used by the HiveMetaStoreChecker. */ public class CheckResult { - private List tablesNotOnFs = new ArrayList(); - private List tablesNotInMs = new ArrayList(); - private List partitionsNotOnFs = new ArrayList(); - private List partitionsNotInMs = new ArrayList(); + private Set tablesNotOnFs = new TreeSet(); + private Set tablesNotInMs = new TreeSet(); + private Set partitionsNotOnFs = new TreeSet(); + private Set partitionsNotInMs = new TreeSet(); /** * @return a list of tables not found on the filesystem. */ - public List getTablesNotOnFs() { + public Set getTablesNotOnFs() { return tablesNotOnFs; } @@ -41,14 +41,14 @@ * @param tablesNotOnFs * a list of tables not found on the filesystem. */ - public void setTablesNotOnFs(List tablesNotOnFs) { + public void setTablesNotOnFs(Set tablesNotOnFs) { this.tablesNotOnFs = tablesNotOnFs; } /** * @return a list of tables not found in the metastore. */ - public List getTablesNotInMs() { + public Set getTablesNotInMs() { return tablesNotInMs; } @@ -56,14 +56,14 @@ public void setTablesNotOnFs(List tablesNotOnFs) { * @param tablesNotInMs * a list of tables not found in the metastore. */ - public void setTablesNotInMs(List tablesNotInMs) { + public void setTablesNotInMs(Set tablesNotInMs) { this.tablesNotInMs = tablesNotInMs; } /** * @return a list of partitions not found on the fs */ - public List getPartitionsNotOnFs() { + public Set getPartitionsNotOnFs() { return partitionsNotOnFs; } @@ -71,14 +71,14 @@ public void setTablesNotInMs(List tablesNotInMs) { * @param partitionsNotOnFs * a list of partitions not found on the fs */ - public void setPartitionsNotOnFs(List partitionsNotOnFs) { + public void setPartitionsNotOnFs(Set partitionsNotOnFs) { this.partitionsNotOnFs = partitionsNotOnFs; } /** * @return a list of partitions not found in the metastore */ - public List getPartitionsNotInMs() { + public Set getPartitionsNotInMs() { return partitionsNotInMs; } @@ -86,7 +86,7 @@ public void setPartitionsNotOnFs(List partitionsNotOnFs) { * @param partitionsNotInMs * a list of partitions not found in the metastore */ - public void setPartitionsNotInMs(List partitionsNotInMs) { + public void setPartitionsNotInMs(Set partitionsNotInMs) { this.partitionsNotInMs = partitionsNotInMs; } diff --git ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java index 34b76b8..b127450 100644 --- ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.HashSet; import java.util.LinkedList; import java.util.List; @@ -33,6 +32,9 @@ import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FileStatus; @@ -113,10 +115,6 @@ public void checkMetastore(String dbName, String tableName, // check the specified partitions checkTable(dbName, tableName, partitions, result); } - Collections.sort(result.getPartitionsNotInMs()); - Collections.sort(result.getPartitionsNotOnFs()); - Collections.sort(result.getTablesNotInMs()); - Collections.sort(result.getTablesNotOnFs()); } catch (MetaException e) { throw new HiveException(e); } catch (TException e) { @@ -317,11 +315,17 @@ void findUnknownPartitions(Table table, Set partPaths, // remove the partition paths we know about allPartDirs.removeAll(partPaths); + Set partColNames = Sets.newHashSet(); + for(FieldSchema fSchema : table.getPartCols()) { + partColNames.add(fSchema.getName()); + } + // we should now only have the unexpected folders left for (Path partPath : allPartDirs) { FileSystem fs = partPath.getFileSystem(conf); String partitionName = getPartitionName(fs.makeQualified(tablePath), - partPath); + partPath, partColNames); + LOG.debug("PartitionName: " + partitionName); if (partitionName != null) { PartitionResult pr = new PartitionResult(); @@ -331,6 +335,7 @@ void findUnknownPartitions(Table table, Set partPaths, result.getPartitionsNotInMs().add(pr); } } + LOG.debug("Number of partitions not in metastore : " + result.getPartitionsNotInMs().size()); } /** @@ -340,18 +345,33 @@ void findUnknownPartitions(Table table, Set partPaths, * Path of the table. * @param partitionPath * Path of the partition. + * @param partCols + * Set of partition columns from table definition * @return Partition name, for example partitiondate=2008-01-01 */ - private String getPartitionName(Path tablePath, Path partitionPath) { + static String getPartitionName(Path tablePath, Path partitionPath, + Set partCols) { String result = null; Path currPath = partitionPath; + LOG.debug("tablePath:" + tablePath + ", partCols: " + partCols); + while (currPath != null && !tablePath.equals(currPath)) { - if (result == null) { - result = currPath.getName(); - } else { - result = currPath.getName() + Path.SEPARATOR + result; + // format: partition=p_val + // Add only when table partition colName matches + String[] parts = currPath.getName().split("="); + if (parts != null && parts.length > 0) { + Preconditions.checkArgument(parts.length == 2, + (result + " is not a valid partition name")); + + String partitionName = parts[0]; + if (partCols.contains(partitionName)) { + if (result == null) { + result = currPath.getName(); + } else { + result = currPath.getName() + Path.SEPARATOR + result; + } + } } - currPath = currPath.getParent(); } return result; diff --git ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java index 3f26bcd..cda6e30 100644 --- ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java +++ ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; +import com.google.common.collect.Lists; import junit.framework.TestCase; import org.apache.hadoop.fs.FileSystem; @@ -111,19 +112,19 @@ public void testTableCheck() throws HiveException, MetaException, CheckResult result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); // we haven't added anything so should return an all ok - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // check table only, should not exist in ms result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); assertEquals(1, result.getTablesNotInMs().size()); - assertEquals(tableName, result.getTablesNotInMs().get(0)); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(tableName, result.getTablesNotInMs().iterator().next()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); Database db = new Database(); db.setName(dbName); @@ -139,18 +140,18 @@ public void testTableCheck() throws HiveException, MetaException, // first check all (1) tables result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // then let's check the one we know about result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // remove the table folder fs = table.getPath().getFileSystem(hive.getConf()); @@ -159,11 +160,11 @@ public void testTableCheck() throws HiveException, MetaException, // now this shouldn't find the path on the fs result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); - assertEquals(Collections.emptyList(), result.getTablesNotInMs());; + assertEquals(Collections.emptySet(), result.getTablesNotInMs());; assertEquals(1, result.getTablesNotOnFs().size()); - assertEquals(tableName, result.getTablesNotOnFs().get(0)); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(tableName, result.getTablesNotOnFs().iterator().next()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // put it back and one additional table fs.mkdirs(table.getPath()); @@ -176,10 +177,10 @@ public void testTableCheck() throws HiveException, MetaException, result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); assertEquals(1, result.getTablesNotInMs().size()); - assertEquals(fakeTable.getName(), result.getTablesNotInMs().get(0)); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(fakeTable.getName(), Lists.newArrayList(result.getTablesNotInMs()).get(0)); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // create a new external table hive.dropTable(dbName, tableName); @@ -189,10 +190,10 @@ public void testTableCheck() throws HiveException, MetaException, // should return all ok result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); } public void testPartitionsCheck() throws HiveException, MetaException, @@ -218,13 +219,26 @@ public void testPartitionsCheck() throws HiveException, MetaException, CheckResult result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); // all is well - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); List partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); + // add a fake partition dir on fs to ensure that it does not get added + fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf()); + Path fakePart = new Path(table.getDataLocation().toString(), + "fakedate=2009-01-01/fakecity=sanjose"); + fs.mkdirs(fakePart); + fs.deleteOnExit(fakePart); + checker.checkMetastore(dbName, tableName, null, result); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(0, result.getPartitionsNotOnFs().size()); + assertEquals(0, result.getPartitionsNotInMs().size()); + assertEquals(2, partitions.size()); //no additional partitions got added + Partition partToRemove = partitions.get(0); // As this partition (partdate=2008-01-01/partcity=london) is the only // partition under (partdate=2008-01-01) @@ -236,27 +250,24 @@ public void testPartitionsCheck() throws HiveException, MetaException, result = new CheckResult(); checker.checkMetastore(dbName, tableName, null, result); // missing one partition on fs - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); assertEquals(1, result.getPartitionsNotOnFs().size()); - assertEquals(partToRemove.getName(), result.getPartitionsNotOnFs().get(0) + assertEquals(partToRemove.getName(), result.getPartitionsNotOnFs().iterator().next() .getPartitionName()); - assertEquals(partToRemove.getTable().getTableName(), result - .getPartitionsNotOnFs().get(0).getTableName()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); + assertEquals(partToRemove.getTable().getTableName(), + result.getPartitionsNotOnFs().iterator().next().getTableName()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); List> partsCopy = new ArrayList>(); partsCopy.add(partitions.get(1).getSpec()); // check only the partition that exists, all should be well result = new CheckResult(); checker.checkMetastore(dbName, tableName, partsCopy, result); - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); - - // put the other one back - fs.mkdirs(partToRemovePath); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // old test is moved to msck_repair_2.q @@ -265,12 +276,11 @@ public void testPartitionsCheck() throws HiveException, MetaException, hive.createTable(table); result = new CheckResult(); checker.checkMetastore(dbName, null, null, result); - assertEquals(Collections.emptyList(), result.getTablesNotInMs()); - assertEquals(Collections.emptyList(), result.getTablesNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptyList(), result.getPartitionsNotInMs()); //--0e + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); + assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); //--0e System.err.println("Test completed - partition check"); - } public void testDataDeletion() throws HiveException, MetaException,