diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a04ef38fbf..22d13a96a1 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -4302,17 +4302,29 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Merge adjacent joins into a single n-way join"), HIVE_LOG_N_RECORDS("hive.log.every.n.records", 0L, new RangeValidator(0L, null), "If value is greater than 0 logs in fixed intervals of size n rather than exponentially."), + /** + * @deprecated Use MetastoreConf.MSCK_PATH_VALIDATION + */ + @Deprecated HIVE_MSCK_PATH_VALIDATION("hive.msck.path.validation", "throw", new StringSet("throw", "skip", "ignore"), "The approach msck should take with HDFS " + "directories that are partition-like but contain unsupported characters. 'throw' (an " + "exception) is the default; 'skip' will skip the invalid directories and still repair the" + " others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"), + /** + * @deprecated Use MetastoreConf.MSCK_REPAIR_BATCH_SIZE + */ + @Deprecated HIVE_MSCK_REPAIR_BATCH_SIZE( "hive.msck.repair.batch.size", 3000, "Batch size for the msck repair command. If the value is greater than zero,\n " + "it will execute batch wise with the configured batch size. In case of errors while\n" + "adding unknown partitions the batch size is automatically reduced by half in the subsequent\n" + "retry attempt. The default value is 3000 which means it will execute in the batches of 3000."), + /** + * @deprecated Use MetastoreConf.MSCK_REPAIR_BATCH_MAX_RETRIES + */ + @Deprecated HIVE_MSCK_REPAIR_BATCH_MAX_RETRIES("hive.msck.repair.batch.max.retries", 4, "Maximum number of retries for the msck repair command when adding unknown partitions.\n " + "If the value is greater than zero it will retry adding unknown partitions until the maximum\n" diff --git a/hbase-handler/src/test/results/positive/external_table_ppd.q.out b/hbase-handler/src/test/results/positive/external_table_ppd.q.out index edcbe7e53c..22c8b7045d 100644 --- a/hbase-handler/src/test/results/positive/external_table_ppd.q.out +++ b/hbase-handler/src/test/results/positive/external_table_ppd.q.out @@ -60,6 +60,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true hbase.table.default.storage.type binary hbase.table.name t_hive diff --git a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out index 1209c880f7..bf1a89dbcc 100644 --- a/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_binary_storage_queries.q.out @@ -60,6 +60,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true hbase.table.default.storage.type binary hbase.table.name t_hive @@ -242,6 +243,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"bigint_col\":\"true\",\"boolean_col\":\"true\",\"double_col\":\"true\",\"float_col\":\"true\",\"int_col\":\"true\",\"key\":\"true\",\"smallint_col\":\"true\",\"tinyint_col\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true hbase.table.name t_hive numFiles 0 numRows 0 diff --git a/hbase-handler/src/test/results/positive/hbase_ddl.q.out b/hbase-handler/src/test/results/positive/hbase_ddl.q.out index 25dd66bbc3..ebfdf00be1 100644 --- a/hbase-handler/src/test/results/positive/hbase_ddl.q.out +++ b/hbase-handler/src/test/results/positive/hbase_ddl.q.out @@ -118,6 +118,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true hbase.mapred.output.outputtable kkk hbase.table.name hbase_table_0 @@ -167,6 +168,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true hbase.table.name hbase_table_0 #### A masked pattern was here #### diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index 5ee1eb1621..75ae9b34d9 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -983,6 +983,7 @@ WITH SERDEPROPERTIES ( 'hbase.columns.mapping'='cf:string', 'serialization.format'='1') TBLPROPERTIES ( + 'discover.partitions'='true', 'hbase.table.name'='hbase_table_0', #### A masked pattern was here #### PREHOOK: query: DROP TABLE IF EXISTS hbase_table_9 diff --git a/hbase-handler/src/test/results/positive/hbasestats.q.out b/hbase-handler/src/test/results/positive/hbasestats.q.out index 5a4aea9667..5143522a8d 100644 --- a/hbase-handler/src/test/results/positive/hbasestats.q.out +++ b/hbase-handler/src/test/results/positive/hbasestats.q.out @@ -42,6 +42,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true numFiles 0 numRows 0 @@ -136,6 +137,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true #### A masked pattern was here #### numFiles 0 @@ -203,6 +205,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true #### A masked pattern was here #### numFiles 0 @@ -262,6 +265,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true #### A masked pattern was here #### numFiles 0 @@ -371,6 +375,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true external.table.purge true #### A masked pattern was here #### numFiles 0 diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java index c4658f92f9..85d8529c50 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java @@ -1675,7 +1675,6 @@ public void testTableProperties() throws Exception { Assert.assertNotEquals("Unexpected default compression size", 2700, OrcConf.BUFFER_SIZE.getDefaultValue()); - // Insert one more row - this should trigger hive.compactor.delta.pct.threshold to be reached for ttp2 executeStatementOnDriver("insert into " + tblName1 + " values (6, 'f')", driver); executeStatementOnDriver("insert into " + tblName2 + " values (6, 'f')", driver); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java index c072133f68..ee7797d5fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java @@ -77,7 +77,10 @@ import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaHook; import org.apache.hadoop.hive.metastore.HiveMetaStoreUtils; +import org.apache.hadoop.hive.metastore.Msck; +import org.apache.hadoop.hive.metastore.MsckInfo; import org.apache.hadoop.hive.metastore.PartitionDropOptions; +import org.apache.hadoop.hive.metastore.PartitionManagementTask; import org.apache.hadoop.hive.metastore.StatObjectConverter; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; @@ -152,13 +155,13 @@ import org.apache.hadoop.hive.ql.lockmgr.HiveLockObject.HiveLockObjectData; import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager; import org.apache.hadoop.hive.ql.metadata.CheckConstraint; -import org.apache.hadoop.hive.ql.metadata.CheckResult; +import org.apache.hadoop.hive.metastore.CheckResult; import org.apache.hadoop.hive.ql.metadata.DefaultConstraint; import org.apache.hadoop.hive.ql.metadata.ForeignKeyInfo; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveMaterializedViewsRegistry; -import org.apache.hadoop.hive.ql.metadata.HiveMetaStoreChecker; +import org.apache.hadoop.hive.metastore.HiveMetaStoreChecker; import org.apache.hadoop.hive.ql.metadata.InvalidTableException; import org.apache.hadoop.hive.ql.metadata.NotNullConstraint; import org.apache.hadoop.hive.ql.metadata.Partition; @@ -2126,279 +2129,22 @@ private int compact(Hive db, AlterTableSimpleDesc desc) throws HiveException { * @return Returns 0 when execution succeeds and above 0 if it fails. */ private int msck(Hive db, MsckDesc msckDesc) { - CheckResult result = new CheckResult(); - List repairOutput = new ArrayList(); + Msck msck; try { - HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db); + msck = new Msck( false, false); + msck.init(db.getConf()); String[] names = Utilities.getDbTableName(msckDesc.getTableName()); - - // checkMetastore call will fill in result with partitions that are present in filesystem - // and missing in metastore - accessed through getPartitionsNotInMs - // And partitions that are not present in filesystem and metadata exists in metastore - - // accessed through getPartitionNotOnFS - checker.checkMetastore(names[0], names[1], msckDesc.getPartSpecs(), result); - Set partsNotInMs = result.getPartitionsNotInMs(); - Set partsNotInFs = result.getPartitionsNotOnFs(); - - if (msckDesc.isRepairPartitions()) { - // Repair metadata in HMS - - Table table = db.getTable(msckDesc.getTableName()); - int maxRetries = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_MAX_RETRIES); - int decayingFactor = 2; - - if (msckDesc.isAddPartitions() && !partsNotInMs.isEmpty()) { - // MSCK called to add missing paritions into metastore and there are - // missing partitions. - - int batchSize = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); - if (batchSize == 0) { - //batching is not enabled. Try to add all the partitions in one call - batchSize = partsNotInMs.size(); - } - - AbstractList vals = null; - String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION); - boolean doValidate = !("ignore".equals(settingStr)); - boolean doSkip = doValidate && "skip".equals(settingStr); - // The default setting is "throw"; assume doValidate && !doSkip means throw. - if (doValidate) { - // Validate that we can add partition without escaping. Escaping was originally intended - // to avoid creating invalid HDFS paths; however, if we escape the HDFS path (that we - // deem invalid but HDFS actually supports - it is possible to create HDFS paths with - // unprintable characters like ASCII 7), metastore will create another directory instead - // of the one we are trying to "repair" here. - Iterator iter = partsNotInMs.iterator(); - while (iter.hasNext()) { - CheckResult.PartitionResult part = iter.next(); - try { - vals = Warehouse.makeValsFromName(part.getPartitionName(), vals); - } catch (MetaException ex) { - throw new HiveException(ex); - } - for (String val : vals) { - String escapedPath = FileUtils.escapePathName(val); - assert escapedPath != null; - if (escapedPath.equals(val)) { - continue; - } - String errorMsg = "Repair: Cannot add partition " + msckDesc.getTableName() + ':' + - part.getPartitionName() + " due to invalid characters in the name"; - if (doSkip) { - repairOutput.add(errorMsg); - iter.remove(); - } else { - throw new HiveException(errorMsg); - } - } - } - } - try { - createPartitionsInBatches(db, repairOutput, partsNotInMs, table, batchSize, - decayingFactor, maxRetries); - } catch (Exception e) { - throw new HiveException(e); - } - } - - if (msckDesc.isDropPartitions() && !partsNotInFs.isEmpty()) { - // MSCK called to drop stale paritions from metastore and there are - // stale partitions. - - int batchSize = conf.getIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE); - if (batchSize == 0) { - //batching is not enabled. Try to drop all the partitions in one call - batchSize = partsNotInFs.size(); - } - - try { - dropPartitionsInBatches(db, repairOutput, partsNotInFs, table, batchSize, - decayingFactor, maxRetries); - } catch (Exception e) { - throw new HiveException(e); - } - } - } - } catch (HiveException e) { - LOG.warn("Failed to run metacheck: ", e); + MsckInfo msckInfo = new MsckInfo(SessionState.get().getCurrentCatalog(), names[0], + names[1], msckDesc.getPartSpecs(), msckDesc.getResFile(), + msckDesc.isRepairPartitions(), msckDesc.isAddPartitions(), msckDesc.isDropPartitions(), -1); + return msck.repair(msckInfo); + } catch (MetaException e) { + LOG.error("Unable to create msck instance.", e); return 1; - } catch (IOException e) { - LOG.warn("Failed to run metacheck: ", e); + } catch (SemanticException e) { + LOG.error("Msck failed.", e); return 1; - } finally { - BufferedWriter resultOut = null; - try { - Path resFile = new Path(msckDesc.getResFile()); - FileSystem fs = resFile.getFileSystem(conf); - resultOut = new BufferedWriter(new OutputStreamWriter(fs - .create(resFile))); - - boolean firstWritten = false; - firstWritten |= writeMsckResult(result.getTablesNotInMs(), - "Tables not in metastore:", resultOut, firstWritten); - firstWritten |= writeMsckResult(result.getTablesNotOnFs(), - "Tables missing on filesystem:", resultOut, firstWritten); - firstWritten |= writeMsckResult(result.getPartitionsNotInMs(), - "Partitions not in metastore:", resultOut, firstWritten); - firstWritten |= writeMsckResult(result.getPartitionsNotOnFs(), - "Partitions missing from filesystem:", resultOut, firstWritten); - for (String rout : repairOutput) { - if (firstWritten) { - resultOut.write(terminator); - } else { - firstWritten = true; - } - resultOut.write(rout); - } - } catch (IOException e) { - LOG.warn("Failed to save metacheck output: ", e); - return 1; - } finally { - if (resultOut != null) { - try { - resultOut.close(); - } catch (IOException e) { - LOG.warn("Failed to close output file: ", e); - return 1; - } - } - } - } - - return 0; - } - - @VisibleForTesting - void createPartitionsInBatches(Hive db, List repairOutput, - Set partsNotInMs, Table table, int batchSize, int decayingFactor, int maxRetries) - throws Exception { - String addMsgFormat = "Repair: Added partition to metastore " - + table.getTableName() + ":%s"; - Set batchWork = new HashSet<>(partsNotInMs); - new RetryUtilities.ExponentiallyDecayingBatchWork(batchSize, decayingFactor, maxRetries) { - @Override - public Void execute(int size) throws Exception { - while (!batchWork.isEmpty()) { - //get the current batch size - int currentBatchSize = size; - AddPartitionDesc apd = - new AddPartitionDesc(table.getDbName(), table.getTableName(), true); - //store the partitions temporarily until processed - List lastBatch = new ArrayList<>(currentBatchSize); - List addMsgs = new ArrayList<>(currentBatchSize); - //add the number of partitions given by the current batchsize - for (CheckResult.PartitionResult part : batchWork) { - if (currentBatchSize == 0) { - break; - } - apd.addPartition(Warehouse.makeSpecFromName(part.getPartitionName()), null); - lastBatch.add(part); - addMsgs.add(String.format(addMsgFormat, part.getPartitionName())); - currentBatchSize--; - } - db.createPartitions(apd); - // if last batch is successful remove it from partsNotInMs - batchWork.removeAll(lastBatch); - repairOutput.addAll(addMsgs); - } - return null; - } - }.run(); - } - - // Drops partitions in batches. partNotInFs is split into batches based on batchSize - // and dropped. The dropping will be through RetryUtilities which will retry when there is a - // failure after reducing the batchSize by decayingFactor. Retrying will cease when maxRetries - // limit is reached or batchSize reduces to 0, whichever comes earlier. - @VisibleForTesting - void dropPartitionsInBatches(Hive db, List repairOutput, - Set partsNotInFs, Table table, int batchSize, int decayingFactor, - int maxRetries) throws Exception { - String dropMsgFormat = - "Repair: Dropped partition from metastore " + table.getFullyQualifiedName() + ":%s"; - // Copy of partitions that will be split into batches - Set batchWork = new TreeSet<>(partsNotInFs); - - new RetryUtilities.ExponentiallyDecayingBatchWork(batchSize, decayingFactor, maxRetries) { - @Override - public Void execute(int size) throws Exception { - while (!batchWork.isEmpty()) { - int currentBatchSize = size; - - // to store the partitions that are currently being processed - List lastBatch = new ArrayList<>(currentBatchSize); - - // drop messages for the dropped partitions - List dropMsgs = new ArrayList<>(currentBatchSize); - - // Partitions to be dropped - List dropParts = new ArrayList<>(currentBatchSize); - - for (CheckResult.PartitionResult part : batchWork) { - // This batch is full: break out of for loop to execute - if (currentBatchSize == 0) { - break; - } - - dropParts.add(part.getPartitionName()); - - // Add the part to lastBatch to track the parition being dropped - lastBatch.add(part); - - // Update messages - dropMsgs.add(String.format(dropMsgFormat, part.getPartitionName())); - - // Decrement batch size. When this gets to 0, the batch will be executed - currentBatchSize--; - } - - // this call is deleting partitions that are already missing from filesystem - // so 3rd parameter (deleteData) is set to false - // msck is doing a clean up of hms. if for some reason the partition is already - // deleted, then it is good. So, the last parameter ifexists is set to true - db.dropPartitions(table, dropParts, false, true); - - // if last batch is successful remove it from partsNotInFs - batchWork.removeAll(lastBatch); - repairOutput.addAll(dropMsgs); - } - return null; - } - }.run(); - } - - /** - * Write the result of msck to a writer. - * - * @param result - * The result we're going to write - * @param msg - * Message to write. - * @param out - * Writer to write to - * @param wrote - * if any previous call wrote data - * @return true if something was written - * @throws IOException - * In case the writing fails - */ - private boolean writeMsckResult(Set result, String msg, - Writer out, boolean wrote) throws IOException { - - if (!result.isEmpty()) { - if (wrote) { - out.write(terminator); - } - - out.write(msg); - for (Object entry : result) { - out.write(separator); - out.write(entry.toString()); - } - return true; } - - return false; } /** @@ -4997,6 +4743,8 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio if (crtTbl.isExternal()) { tbl.setProperty("EXTERNAL", "TRUE"); tbl.setTableType(TableType.EXTERNAL_TABLE); + // partition discovery is on by default + tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); } tbl.setFields(oldtbl.getCols()); @@ -5094,6 +4842,8 @@ private int createTableLike(Hive db, CreateTableLikeDesc crtTbl) throws Exceptio if (crtTbl.isExternal()) { tbl.setProperty("EXTERNAL", "TRUE"); tbl.setTableType(TableType.EXTERNAL_TABLE); + // partition discovery is on by default + tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); } else { tbl.getParameters().remove("EXTERNAL"); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 5fa5e9e528..63a591e2ea 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -307,32 +307,28 @@ private JSONObject getLocks(PrintStream out, ExplainWork work) { if (jsonOutput) { out = null; } - if (work.getParseContext() != null) { - List lockComponents = AcidUtils.makeLockComponents(work.getOutputs(), work.getInputs(), conf); - if (null != out) { - out.print("LOCK INFORMATION:\n"); - } - List locks = new ArrayList<>(lockComponents.size()); - - for (LockComponent component : lockComponents) { - ExplainLockDesc lockDesc = new ExplainLockDesc(component); + List lockComponents = AcidUtils.makeLockComponents(work.getOutputs(), work.getInputs(), conf); + if (null != out) { + out.print("LOCK INFORMATION:\n"); + } + List locks = new ArrayList<>(lockComponents.size()); - if (null != out) { - out.print(lockDesc.getFullName()); - out.print(" -> "); - out.print(lockDesc.getLockType()); - out.print('\n'); - } else { - locks.add(lockDesc); - } + for (LockComponent component : lockComponents) { + ExplainLockDesc lockDesc = new ExplainLockDesc(component); + if (null != out) { + out.print(lockDesc.getFullName()); + out.print(" -> "); + out.print(lockDesc.getLockType()); + out.print('\n'); + } else { + locks.add(lockDesc); } - if (jsonOutput) { - jsonObject.put("LOCK INFORMATION:", locks); - } - } else { - System.err.println("No parse context!"); + } + + if (jsonOutput) { + jsonObject.put("LOCK INFORMATION:", locks); } return jsonObject; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/api/MetastoreException.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/api/MetastoreException.java new file mode 100644 index 0000000000..ab89389981 --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/api/MetastoreException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.api; + +public class MetastoreException extends Exception { + public MetastoreException() { + super(); + } + + public MetastoreException(String message) { + super(message); + } + + public MetastoreException(Throwable cause) { + super(cause); + } + + public MetastoreException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java index f7eb711fb7..9dafe08b64 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java @@ -32,6 +32,7 @@ import java.util.Map.Entry; import java.util.Properties; import java.util.Set; +import java.util.stream.Collectors; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -39,6 +40,7 @@ import org.apache.hadoop.hive.common.StringInternUtils; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.Context; import org.apache.hadoop.hive.ql.exec.ColumnInfo; @@ -1320,7 +1322,7 @@ public static void createMRWorkForMergingFiles(FileSinkOperator fsInput, } // update the FileSinkOperator to include partition columns - usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), dpCtx.getDPColNames()); + usePartitionColumns(fsInputDesc.getTableInfo().getProperties(), fsInputDesc.getTable(), dpCtx.getDPColNames()); } else { // non-partitioned table fsInputDesc.getTableInfo().getProperties().remove( @@ -2086,6 +2088,23 @@ public static String findAlias(MapWork work, Operator operator) { } return null; } + + static void usePartitionColumns(Properties properties, Table table, List partColNames) { + if (properties.containsKey(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS)) { + usePartitionColumns(properties, partColNames); + } else { + List partCols = table.getPartCols(); + String partNames = partCols.stream().map(FieldSchema::getName).collect(Collectors.joining("/")); + String partTypes = partCols.stream().map(FieldSchema::getType).collect(Collectors.joining(":")); + properties.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, + partNames); + properties.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES, + partTypes); + } + } + /** * Uses only specified partition columns. * Provided properties should be pre-populated with partition column names and types. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java index b14648ab24..b94b66ad27 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java @@ -3775,7 +3775,11 @@ private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException { } Table tab = getTable(tableName); List> specs = getPartitionSpecs(tab, ast); - outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); + if (repair && AcidUtils.isTransactionalTable(tab)) { + outputs.add(new WriteEntity(tab, WriteType.DDL_EXCLUSIVE)); + } else { + outputs.add(new WriteEntity(tab, WriteEntity.WriteType.DDL_SHARED)); + } MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), repair, addPartitions, dropPartitions); rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java index 0fadf1b61f..a1843a66e1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/CreateTableDesc.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.PartitionManagementTask; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.Order; @@ -836,6 +837,11 @@ public Table toTable(HiveConf conf) throws HiveException { if (isExternal()) { tbl.setProperty("EXTERNAL", "TRUE"); tbl.setTableType(TableType.EXTERNAL_TABLE); + // only add if user have not explicit set it (user explicitly disabled for example in which case don't flip it) + if (tbl.getProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY) == null) { + // partition discovery is on by default if undefined + tbl.setProperty(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + } } // If the sorted columns is a superset of bucketed columns, store this fact. diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java index ce2b186b4d..3e45016450 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckCreatePartitionsInBatches.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.util.ArrayList; @@ -27,17 +29,23 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.CheckResult.PartitionResult; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.Msck; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.RetryUtilities; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; -import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; -import org.apache.hadoop.hive.ql.plan.AddPartitionDesc; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; -import org.apache.hive.common.util.RetryUtilities.RetryException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -48,42 +56,61 @@ public class TestMsckCreatePartitionsInBatches { private static HiveConf hiveConf; - private static DDLTask ddlTask; + private static Msck msck; + private final String catName = "hive"; + private final String dbName = "default"; private final String tableName = "test_msck_batch"; - private static Hive db; + private static IMetaStoreClient db; private List repairOutput; private Table table; @BeforeClass - public static void setupClass() throws HiveException { + public static void setupClass() throws HiveException, MetaException { hiveConf = new HiveConf(TestMsckCreatePartitionsInBatches.class); hiveConf.setIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE, 5); hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); SessionState.start(hiveConf); - db = Hive.get(hiveConf); - ddlTask = new DDLTask(); + try { + db = new HiveMetaStoreClient(hiveConf); + } catch (MetaException e) { + throw new HiveException(e); + } + msck = new Msck( false, false); + msck.init(hiveConf); } @Before public void before() throws Exception { - createPartitionedTable("default", tableName); - table = db.getTable(tableName); + createPartitionedTable(catName, dbName, tableName); + table = db.getTable(catName, dbName, tableName); repairOutput = new ArrayList(); } @After public void after() throws Exception { - cleanUpTableQuietly("default", tableName); + cleanUpTableQuietly(catName, dbName, tableName); } - private Table createPartitionedTable(String dbName, String tableName) throws Exception { + private Table createPartitionedTable(String catName, String dbName, String tableName) throws Exception { try { - db.dropTable(dbName, tableName); - db.createTable(tableName, Arrays.asList("key", "value"), // Data columns. - Arrays.asList("city"), // Partition columns. - TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - return db.getTable(dbName, tableName); + db.dropTable(catName, dbName, tableName); + Table table = new Table(); + table.setCatName(catName); + table.setDbName(dbName); + table.setTableName(tableName); + FieldSchema col1 = new FieldSchema("key", "string", ""); + FieldSchema col2 = new FieldSchema("value", "int", ""); + FieldSchema col3 = new FieldSchema("city", "string", ""); + StorageDescriptor sd = new StorageDescriptor(); + sd.setSerdeInfo(new SerDeInfo()); + sd.setInputFormat(TextInputFormat.class.getCanonicalName()); + sd.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); + sd.setCols(Arrays.asList(col1, col2)); + table.setPartitionKeys(Arrays.asList(col3)); + table.setSd(sd); + db.createTable(table); + return db.getTable(catName, dbName, tableName); } catch (Exception exception) { fail("Unable to drop and create table " + StatsUtils.getFullyQualifiedTableName(dbName, tableName) + " because " + StringUtils.stringifyException(exception)); @@ -91,9 +118,9 @@ private Table createPartitionedTable(String dbName, String tableName) throws Exc } } - private void cleanUpTableQuietly(String dbName, String tableName) { + private void cleanUpTableQuietly(String catName, String dbName, String tableName) { try { - db.dropTable(dbName, tableName, true, true, true); + db.dropTable(catName, dbName, tableName); } catch (Exception exception) { fail("Unexpected exception: " + StringUtils.stringifyException(exception)); } @@ -119,19 +146,23 @@ private void cleanUpTableQuietly(String dbName, String tableName) { public void testNumberOfCreatePartitionCalls() throws Exception { // create 10 dummy partitions Set partsNotInMs = createPartsNotInMs(10); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); // batch size of 5 and decaying factor of 2 - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 5, 2, 0); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 5, 2, 0); // there should be 2 calls to create partitions with each batch size of 5 - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb, Mockito.times(2)).createPartitions(argument.capture()); + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); + Mockito.verify(spyDb, Mockito.times(2)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), needResultsArg.capture()); // confirm the batch sizes were 5, 5 in the two calls to create partitions - List apds = argument.getAllValues(); + List> apds = argParts.getAllValues(); int retryAttempt = 1; Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), - 5, apds.get(0).getPartitionCount()); + 5, apds.get(0).size()); Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), - 5, apds.get(1).getPartitionCount()); + 5, apds.get(1).size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -144,19 +175,23 @@ public void testNumberOfCreatePartitionCalls() throws Exception { public void testUnevenNumberOfCreatePartitionCalls() throws Exception { // create 9 dummy partitions Set partsNotInMs = createPartsNotInMs(9); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); // batch size of 5 and decaying factor of 2 - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 5, 2, 0); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 5, 2, 0); // there should be 2 calls to create partitions with batch sizes of 5, 4 - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb, Mockito.times(2)).createPartitions(argument.capture()); + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); + Mockito.verify(spyDb, Mockito.times(2)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), needResultsArg.capture()); // confirm the batch sizes were 5, 4 in the two calls to create partitions - List apds = argument.getAllValues(); + List> apds = argParts.getAllValues(); int retryAttempt = 1; Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), - 5, apds.get(0).getPartitionCount()); + 5, apds.get(0).size()); Assert.assertEquals(String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), - 4, apds.get(1).getPartitionCount()); + 4, apds.get(1).size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -169,14 +204,20 @@ public void testUnevenNumberOfCreatePartitionCalls() throws Exception { public void testEqualNumberOfPartitions() throws Exception { // create 13 dummy partitions Set partsNotInMs = createPartsNotInMs(13); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); // batch size of 13 and decaying factor of 2 - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 13, 2, 0); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 13, 2, 0); + // there should be 1 call to create partitions with batch sizes of 13 + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); // there should be 1 call to create partitions with batch sizes of 13 - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb, Mockito.times(1)).createPartitions(argument.capture()); + Mockito.verify(spyDb, Mockito.times(1)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), + needResultsArg.capture()); Assert.assertEquals("Unexpected number of batch size", 13, - argument.getValue().getPartitionCount()); + argParts.getValue().size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -189,15 +230,22 @@ public void testEqualNumberOfPartitions() throws Exception { public void testSmallNumberOfPartitions() throws Exception { // create 10 dummy partitions Set partsNotInMs = createPartsNotInMs(10); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); // batch size of 20 and decaying factor of 2 - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 20, 2, 0); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 20, 2, 0); // there should be 1 call to create partitions with batch sizes of 10 - Mockito.verify(spyDb, Mockito.times(1)).createPartitions(Mockito.anyObject()); - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb).createPartitions(argument.capture()); + Mockito.verify(spyDb, Mockito.times(1)).add_partitions(Mockito.anyObject(), Mockito.anyBoolean(), + Mockito.anyBoolean()); + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); + // there should be 1 call to create partitions with batch sizes of 10 + Mockito.verify(spyDb, Mockito.times(1)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), + needResultsArg.capture()); Assert.assertEquals("Unexpected number of batch size", 10, - argument.getValue().getPartitionCount()); + argParts.getValue().size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -210,28 +258,34 @@ public void testSmallNumberOfPartitions() throws Exception { public void testBatchingWhenException() throws Exception { // create 13 dummy partitions Set partsNotInMs = createPartsNotInMs(23); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); // first call to createPartitions should throw exception Mockito.doThrow(HiveException.class).doCallRealMethod().doCallRealMethod().when(spyDb) - .createPartitions(Mockito.any(AddPartitionDesc.class)); + .add_partitions(Mockito.anyObject(), Mockito.anyBoolean(), + Mockito.anyBoolean()); // test with a batch size of 30 and decaying factor of 2 - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 0); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 0); // confirm the batch sizes were 23, 15, 8 in the three calls to create partitions - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); // there should be 3 calls to create partitions with batch sizes of 23, 15, 8 - Mockito.verify(spyDb, Mockito.times(3)).createPartitions(argument.capture()); - List apds = argument.getAllValues(); + Mockito.verify(spyDb, Mockito.times(3)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), + needResultsArg.capture()); + List> apds = argParts.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 23, - apds.get(0).getPartitionCount()); + apds.get(0).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 15, - apds.get(1).getPartitionCount()); + apds.get(1).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 8, - apds.get(2).getPartitionCount()); + apds.get(2).size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -244,38 +298,44 @@ public void testBatchingWhenException() throws Exception { @Test public void testRetriesExhaustedBatchSize() throws Exception { Set partsNotInMs = createPartsNotInMs(17); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); Mockito.doThrow(HiveException.class).when(spyDb) - .createPartitions(Mockito.any(AddPartitionDesc.class)); + .add_partitions(Mockito.anyObject(), Mockito.anyBoolean(), Mockito.anyBoolean()); // batch size of 5 and decaying factor of 2 Exception ex = null; try { - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 0); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 0); } catch (Exception retryEx) { ex = retryEx; } - Assert.assertFalse("Exception was expected but was not thrown", ex == null); - Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); + assertFalse("Exception was expected but was not thrown", ex == null); + Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryUtilities.RetryException); + // there should be 5 calls to create partitions with batch sizes of 17, 15, 7, 3, 1 + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); // there should be 5 calls to create partitions with batch sizes of 17, 15, 7, 3, 1 - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb, Mockito.times(5)).createPartitions(argument.capture()); - List apds = argument.getAllValues(); + Mockito.verify(spyDb, Mockito.times(5)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), + needResultsArg.capture()); + List> apds = argParts.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 17, - apds.get(0).getPartitionCount()); + apds.get(0).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 15, - apds.get(1).getPartitionCount()); + apds.get(1).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 7, - apds.get(2).getPartitionCount()); + apds.get(2).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 3, - apds.get(3).getPartitionCount()); + apds.get(3).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 1, - apds.get(4).getPartitionCount()); + apds.get(4).size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -285,28 +345,32 @@ public void testRetriesExhaustedBatchSize() throws Exception { @Test public void testMaxRetriesReached() throws Exception { Set partsNotInMs = createPartsNotInMs(17); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); Mockito.doThrow(HiveException.class).when(spyDb) - .createPartitions(Mockito.any(AddPartitionDesc.class)); + .add_partitions(Mockito.anyObject(), Mockito.anyBoolean(), Mockito.anyBoolean()); // batch size of 5 and decaying factor of 2 Exception ex = null; try { - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 2); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 2); } catch (Exception retryEx) { ex = retryEx; } - Assert.assertFalse("Exception was expected but was not thrown", ex == null); - Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb, Mockito.times(2)).createPartitions(argument.capture()); - List apds = argument.getAllValues(); + assertFalse("Exception was expected but was not thrown", ex == null); + Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryUtilities.RetryException); + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); + Mockito.verify(spyDb, Mockito.times(2)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), needResultsArg.capture()); + List> apds = argParts.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 17, - apds.get(0).getPartitionCount()); + apds.get(0).size()); Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 15, - apds.get(1).getPartitionCount()); + apds.get(1).size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } /** @@ -317,25 +381,31 @@ public void testMaxRetriesReached() throws Exception { @Test public void testOneMaxRetries() throws Exception { Set partsNotInMs = createPartsNotInMs(17); - Hive spyDb = Mockito.spy(db); + IMetaStoreClient spyDb = Mockito.spy(db); Mockito.doThrow(HiveException.class).when(spyDb) - .createPartitions(Mockito.any(AddPartitionDesc.class)); + .add_partitions(Mockito.anyObject(), Mockito.anyBoolean(), Mockito.anyBoolean()); // batch size of 5 and decaying factor of 2 Exception ex = null; try { - ddlTask.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 1); + msck.createPartitionsInBatches(spyDb, repairOutput, partsNotInMs, table, 30, 2, 1); } catch (Exception retryEx) { ex = retryEx; } - Assert.assertFalse("Exception was expected but was not thrown", ex == null); - Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); + assertFalse("Exception was expected but was not thrown", ex == null); + Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryUtilities.RetryException); + // there should be 5 calls to create partitions with batch sizes of 17, 15, 7, 3, 1 + ArgumentCaptor ifNotExistsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor needResultsArg = ArgumentCaptor.forClass(Boolean.class); + ArgumentCaptor> argParts = ArgumentCaptor.forClass((Class) List.class); // there should be 5 calls to create partitions with batch sizes of 17, 15, 7, 3, 1 - ArgumentCaptor argument = ArgumentCaptor.forClass(AddPartitionDesc.class); - Mockito.verify(spyDb, Mockito.times(1)).createPartitions(argument.capture()); - List apds = argument.getAllValues(); + Mockito.verify(spyDb, Mockito.times(1)).add_partitions(argParts.capture(), ifNotExistsArg.capture(), + needResultsArg.capture()); + List> apds = argParts.getAllValues(); int retryAttempt = 1; Assert.assertEquals( String.format("Unexpected batch size in retry attempt %d ", retryAttempt++), 17, - apds.get(0).getPartitionCount()); + apds.get(0).size()); + assertTrue(ifNotExistsArg.getValue()); + assertFalse(needResultsArg.getValue()); } } diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java index 9480d38d7c..1ec46364f1 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestMsckDropPartitionsInBatches.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.exec; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.fail; import java.util.ArrayList; @@ -27,16 +28,22 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; +import org.apache.hadoop.hive.metastore.CheckResult.PartitionResult; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; +import org.apache.hadoop.hive.metastore.Msck; +import org.apache.hadoop.hive.metastore.PartitionDropOptions; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetastoreException; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.RetryUtilities; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; -import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; -import org.apache.hadoop.hive.ql.metadata.Hive; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.stats.StatsUtils; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.hadoop.util.StringUtils; -import org.apache.hive.common.util.RetryUtilities.RetryException; import org.junit.After; import org.junit.Assert; import org.junit.Before; @@ -47,57 +54,71 @@ /** * Unit test for function dropPartitionsInBatches in DDLTask. - * **/ public class TestMsckDropPartitionsInBatches { private static HiveConf hiveConf; - private static DDLTask ddlTask; + private static Msck msck; + private final String catName = "hive"; + private final String dbName = "default"; private final String tableName = "test_msck_batch"; - private static Hive db; + private static IMetaStoreClient db; private List repairOutput; private Table table; @BeforeClass - public static void setupClass() throws HiveException { + public static void setupClass() throws Exception { hiveConf = new HiveConf(TestMsckCreatePartitionsInBatches.class); hiveConf.setIntVar(ConfVars.HIVE_MSCK_REPAIR_BATCH_SIZE, 5); hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); + "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); SessionState.start(hiveConf); - db = Hive.get(hiveConf); - ddlTask = new DDLTask(); + db = new HiveMetaStoreClient(hiveConf); + msck = new Msck( false, false); + msck.init(hiveConf); } @Before public void before() throws Exception { - createPartitionedTable("default", tableName); - table = db.getTable(tableName); + createPartitionedTable(catName, dbName, tableName); + table = db.getTable(catName, dbName, tableName); repairOutput = new ArrayList(); } @After public void after() throws Exception { - cleanUpTableQuietly("default", tableName); + cleanUpTableQuietly(catName, dbName, tableName); } - private Table createPartitionedTable(String dbName, String tableName) throws Exception { + private Table createPartitionedTable(String catName, String dbName, String tableName) throws Exception { try { - db.dropTable(dbName, tableName); - db.createTable(tableName, Arrays.asList("key", "value"), // Data columns. - Arrays.asList("city"), // Partition columns. - TextInputFormat.class, HiveIgnoreKeyTextOutputFormat.class); - return db.getTable(dbName, tableName); + db.dropTable(catName, dbName, tableName); + Table table = new Table(); + table.setCatName(catName); + table.setDbName(dbName); + table.setTableName(tableName); + FieldSchema col1 = new FieldSchema("key", "string", ""); + FieldSchema col2 = new FieldSchema("value", "int", ""); + FieldSchema col3 = new FieldSchema("city", "string", ""); + StorageDescriptor sd = new StorageDescriptor(); + sd.setSerdeInfo(new SerDeInfo()); + sd.setInputFormat(TextInputFormat.class.getCanonicalName()); + sd.setOutputFormat(HiveIgnoreKeyTextOutputFormat.class.getCanonicalName()); + sd.setCols(Arrays.asList(col1, col2)); + table.setPartitionKeys(Arrays.asList(col3)); + table.setSd(sd); + db.createTable(table); + return db.getTable(catName, dbName, tableName); } catch (Exception exception) { fail("Unable to drop and create table " + StatsUtils - .getFullyQualifiedTableName(dbName, tableName) + " because " + StringUtils - .stringifyException(exception)); + .getFullyQualifiedTableName(dbName, tableName) + " because " + StringUtils + .stringifyException(exception)); throw exception; } } - private void cleanUpTableQuietly(String dbName, String tableName) { + private void cleanUpTableQuietly(String catName, String dbName, String tableName) { try { - db.dropTable(dbName, tableName, true, true, true); + db.dropTable(catName, dbName, tableName, true, true, true); } catch (Exception exception) { fail("Unexpected exception: " + StringUtils.stringifyException(exception)); } @@ -142,9 +163,10 @@ private int findMSB(int n) { private final int noException = 1; private final int oneException = 2; private final int allException = 3; + private void runDropPartitions(int partCount, int batchSize, int maxRetries, int exceptionStatus) - throws Exception { - Hive spyDb = Mockito.spy(db); + throws Exception { + IMetaStoreClient spyDb = Mockito.spy(db); // create partCount dummy partitions Set partsNotInFs = dropPartsNotInFs(partCount); @@ -163,13 +185,13 @@ private void runDropPartitions(int partCount, int batchSize, int maxRetries, int if (exceptionStatus == oneException) { // After one exception everything is expected to run - actualBatchSize = batchSize/2; + actualBatchSize = batchSize / 2; } if (exceptionStatus != allException) { - expectedCallCount = partCount/actualBatchSize; + expectedCallCount = partCount / actualBatchSize; - if (expectedCallCount*actualBatchSize < partCount) { + if (expectedCallCount * actualBatchSize < partCount) { // partCount not equally divided into batches. last batch size will be less than batch size lastBatchSize = partCount - (expectedCallCount * actualBatchSize); @@ -182,9 +204,10 @@ private void runDropPartitions(int partCount, int batchSize, int maxRetries, int expectedCallCount++; // only first call throws exception - Mockito.doThrow(HiveException.class).doCallRealMethod().doCallRealMethod().when(spyDb) - .dropPartitions(Mockito.eq(table), Mockito.any(List.class), Mockito.eq(false), - Mockito.eq(true)); + Mockito.doThrow(MetastoreException.class).doCallRealMethod().doCallRealMethod().when(spyDb) + .dropPartitions(Mockito.eq(table.getCatName()), Mockito.eq(table.getDbName()), + Mockito.eq(table.getTableName()), + Mockito.any(List.class), Mockito.any(PartitionDropOptions.class)); } expectedBatchSizes = new int[expectedCallCount]; @@ -195,15 +218,15 @@ private void runDropPartitions(int partCount, int batchSize, int maxRetries, int // second batch to last but one batch will be actualBatchSize // actualBatchSize is same as batchSize when no exceptions are expected // actualBatchSize is half of batchSize when 1 exception is expected - for (int i = 1; i < expectedCallCount-1; i++) { + for (int i = 1; i < expectedCallCount - 1; i++) { expectedBatchSizes[i] = Integer.min(partCount, actualBatchSize); } - expectedBatchSizes[expectedCallCount-1] = lastBatchSize; + expectedBatchSizes[expectedCallCount - 1] = lastBatchSize; // batch size from input and decaying factor of 2 - ddlTask.dropPartitionsInBatches(spyDb, repairOutput, partsNotInFs, table, batchSize, 2, - maxRetries); + msck.dropPartitionsInBatches(spyDb, repairOutput, partsNotInFs, null, table, batchSize, 2, + maxRetries); } else { if (maxRetries == 0) { // Retries will be done till decaying factor reduces to 0. Decaying Factor is 2. @@ -219,35 +242,37 @@ private void runDropPartitions(int partCount, int batchSize, int maxRetries, int expectedBatchSizes[i] = Integer.min(partCount, actualBatchSize); } // all calls fail - Mockito.doThrow(HiveException.class).when(spyDb) - .dropPartitions(Mockito.eq(table), Mockito.any(List.class), Mockito.eq(false), - Mockito.eq(true)); + Mockito.doThrow(MetastoreException.class).when(spyDb) + .dropPartitions(Mockito.eq(table.getCatName()), Mockito.eq(table.getDbName()), Mockito.eq(table.getTableName()), + Mockito.any(List.class), Mockito.any(PartitionDropOptions.class)); Exception ex = null; try { - ddlTask.dropPartitionsInBatches(spyDb, repairOutput, partsNotInFs, table, batchSize, 2, - maxRetries); + msck.dropPartitionsInBatches(spyDb, repairOutput, partsNotInFs, null, table, batchSize, 2, + maxRetries); } catch (Exception retryEx) { ex = retryEx; } Assert.assertFalse("Exception was expected but was not thrown", ex == null); - Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryException); + Assert.assertTrue("Unexpected class of exception thrown", ex instanceof RetryUtilities.RetryException); } // there should be expectedCallCount calls to drop partitions with each batch size of // actualBatchSize ArgumentCaptor argument = ArgumentCaptor.forClass(List.class); Mockito.verify(spyDb, Mockito.times(expectedCallCount)) - .dropPartitions(Mockito.eq(table), argument.capture(), Mockito.eq(false), Mockito.eq(true)); + .dropPartitions(Mockito.eq(table.getCatName()), Mockito.eq(table.getDbName()), Mockito.eq(table.getTableName()), + argument.capture(), Mockito.any(PartitionDropOptions.class)); // confirm the batch sizes were as expected List droppedParts = argument.getAllValues(); + assertEquals(expectedCallCount, droppedParts.size()); for (int i = 0; i < expectedCallCount; i++) { Assert.assertEquals( - String.format("Unexpected batch size in attempt %d. Expected: %d. Found: %d", i + 1, - expectedBatchSizes[i], droppedParts.get(i).size()), - expectedBatchSizes[i], droppedParts.get(i).size()); + String.format("Unexpected batch size in attempt %d. Expected: %d. Found: %d", i + 1, + expectedBatchSizes[i], droppedParts.get(i).size()), + expectedBatchSizes[i], droppedParts.get(i).size()); } } @@ -301,7 +326,7 @@ public void testSmallNumberOfPartitions() throws Exception { /** * Tests the number of calls to dropPartitions and the respective batch sizes when first call to - * dropPartitions throws HiveException. The batch size should be reduced once by the + * dropPartitions throws MetastoreException. The batch size should be reduced once by the * decayingFactor 2, iow after batch size is halved. * * @throws Exception @@ -313,7 +338,7 @@ public void testBatchingWhenException() throws Exception { /** * Tests the retries exhausted case when Hive.DropPartitions method call always keep throwing - * HiveException. The batch sizes should exponentially decreased based on the decaying factor and + * MetastoreException. The batch sizes should exponentially decreased based on the decaying factor and * ultimately give up when it reaches 0. * * @throws Exception @@ -325,6 +350,7 @@ public void testRetriesExhaustedBatchSize() throws Exception { /** * Tests the maximum retry attempt is set to 2. + * * @throws Exception */ @Test @@ -334,6 +360,7 @@ public void testMaxRetriesReached() throws Exception { /** * Tests when max number of retries is set to 1. + * * @throws Exception */ @Test diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java index ff411f62d5..97483a9026 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java @@ -17,7 +17,9 @@ */ package org.apache.hadoop.hive.ql.metadata; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -29,11 +31,14 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.CheckResult; +import org.apache.hadoop.hive.metastore.HiveMetaStoreChecker; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.AlreadyExistsException; import org.apache.hadoop.hive.metastore.api.Database; import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.metastore.api.MetaException; -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.utils.MetastoreException; import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.mapred.TextInputFormat; @@ -51,9 +56,11 @@ public class TestHiveMetaStoreChecker { private Hive hive; + private IMetaStoreClient msc; private FileSystem fs; private HiveMetaStoreChecker checker = null; + private final String catName = "hive"; private final String dbName = "testhivemetastorechecker_db"; private final String tableName = "testhivemetastorechecker_table"; @@ -68,7 +75,8 @@ public void setUp() throws Exception { hive = Hive.get(); hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 15); hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "throw"); - checker = new HiveMetaStoreChecker(hive); + msc = new HiveMetaStoreClient(hive.getConf()); + checker = new HiveMetaStoreChecker(msc, hive.getConf()); partCols = new ArrayList(); partCols.add(new FieldSchema(partDateName, serdeConstants.STRING_TYPE_NAME, "")); @@ -91,11 +99,9 @@ public void setUp() throws Exception { private void dropDbTable() { // cleanup try { - hive.dropTable(dbName, tableName, true, true); - hive.dropDatabase(dbName, true, true, true); - } catch (NoSuchObjectException e) { - // ignore - } catch (HiveException e) { + msc.dropTable(catName, dbName, tableName, true, true); + msc.dropDatabase(catName, dbName, true, true, true); + } catch (TException e) { // ignore } } @@ -107,28 +113,28 @@ public void tearDown() throws Exception { } @Test - public void testTableCheck() throws HiveException, MetaException, - IOException, TException, AlreadyExistsException { + public void testTableCheck() throws HiveException, IOException, TException, MetastoreException { CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, null, null, result); + checker.checkMetastore(catName, dbName, null, null, result); // we haven't added anything so should return an all ok assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // check table only, should not exist in ms result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(1, result.getTablesNotInMs().size()); assertEquals(tableName, result.getTablesNotInMs().iterator().next()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); Database db = new Database(); + db.setCatalogName(catName); db.setName(dbName); - hive.createDatabase(db); + msc.createDatabase(db); Table table = new Table(dbName, tableName); table.setDbName(dbName); @@ -139,19 +145,19 @@ public void testTableCheck() throws HiveException, MetaException, // now we've got a table, check that it works // first check all (1) tables result = new CheckResult(); - checker.checkMetastore(dbName, null, null, result); + checker.checkMetastore(catName, dbName, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // then let's check the one we know about result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // remove the table folder fs = table.getPath().getFileSystem(hive.getConf()); @@ -159,12 +165,12 @@ public void testTableCheck() throws HiveException, MetaException, // now this shouldn't find the path on the fs result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); - assertEquals(Collections.emptySet(), result.getTablesNotInMs());; + checker.checkMetastore(catName, dbName, tableName, null, result); + assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(1, result.getTablesNotOnFs().size()); assertEquals(tableName, result.getTablesNotOnFs().iterator().next()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // put it back and one additional table fs.mkdirs(table.getPath()); @@ -175,12 +181,12 @@ public void testTableCheck() throws HiveException, MetaException, // find the extra table result = new CheckResult(); - checker.checkMetastore(dbName, null, null, result); + checker.checkMetastore(catName, dbName, null, null, result); assertEquals(1, result.getTablesNotInMs().size()); assertEquals(fakeTable.getName(), Lists.newArrayList(result.getTablesNotInMs()).get(0)); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // create a new external table hive.dropTable(dbName, tableName); @@ -189,11 +195,11 @@ public void testTableCheck() throws HiveException, MetaException, // should return all ok result = new CheckResult(); - checker.checkMetastore(dbName, null, null, result); + checker.checkMetastore(catName, dbName, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); } /* @@ -202,7 +208,7 @@ public void testTableCheck() throws HiveException, MetaException, */ @Test public void testAdditionalPartitionDirs() - throws HiveException, AlreadyExistsException, IOException { + throws HiveException, AlreadyExistsException, IOException, MetastoreException { Table table = createTestTable(); List partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); @@ -213,16 +219,17 @@ public void testAdditionalPartitionDirs() fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections. emptySet(), result.getTablesNotInMs()); assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); //fakePart path partition is added since the defined partition keys are valid assertEquals(1, result.getPartitionsNotInMs().size()); } - @Test(expected = HiveException.class) - public void testInvalidPartitionKeyName() throws HiveException, AlreadyExistsException, IOException { + @Test(expected = MetastoreException.class) + public void testInvalidPartitionKeyName() + throws HiveException, AlreadyExistsException, IOException, MetastoreException { Table table = createTestTable(); List partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); @@ -232,7 +239,7 @@ public void testInvalidPartitionKeyName() throws HiveException, AlreadyExistsExc "fakedate=2009-01-01/fakecity=sanjose"); fs.mkdirs(fakePart); fs.deleteOnExit(fakePart); - checker.checkMetastore(dbName, tableName, null, new CheckResult()); + checker.checkMetastore(catName, dbName, tableName, null, new CheckResult()); } /* @@ -241,9 +248,9 @@ public void testInvalidPartitionKeyName() throws HiveException, AlreadyExistsExc */ @Test public void testSkipInvalidPartitionKeyName() - throws HiveException, AlreadyExistsException, IOException { + throws HiveException, AlreadyExistsException, IOException, MetastoreException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); - checker = new HiveMetaStoreChecker(hive); + checker = new HiveMetaStoreChecker(msc, hive.getConf()); Table table = createTestTable(); List partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); @@ -255,18 +262,18 @@ public void testSkipInvalidPartitionKeyName() fs.deleteOnExit(fakePart); createPartitionsDirectoriesOnFS(table, 2); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections. emptySet(), result.getTablesNotInMs()); assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); } - private Table createTestTable() throws AlreadyExistsException, HiveException { + private Table createTestTable() throws HiveException, AlreadyExistsException { Database db = new Database(); db.setName(dbName); - hive.createDatabase(db); + hive.createDatabase(db, true); Table table = new Table(dbName, tableName); table.setDbName(dbName); @@ -284,17 +291,17 @@ private Table createTestTable() throws AlreadyExistsException, HiveException { } @Test - public void testPartitionsCheck() throws HiveException, MetaException, - IOException, TException, AlreadyExistsException { + public void testPartitionsCheck() throws HiveException, + IOException, TException, MetastoreException { Table table = createTestTable(); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); // all is well assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); List partitions = hive.getPartitions(table); assertEquals(2, partitions.size()); @@ -308,7 +315,7 @@ public void testPartitionsCheck() throws HiveException, MetaException, fs.delete(partToRemovePath, true); result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); // missing one partition on fs assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); @@ -317,17 +324,17 @@ public void testPartitionsCheck() throws HiveException, MetaException, .getPartitionName()); assertEquals(partToRemove.getTable().getTableName(), result.getPartitionsNotOnFs().iterator().next().getTableName()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); List> partsCopy = new ArrayList>(); partsCopy.add(partitions.get(1).getSpec()); // check only the partition that exists, all should be well result = new CheckResult(); - checker.checkMetastore(dbName, tableName, partsCopy, result); + checker.checkMetastore(catName, dbName, tableName, partsCopy, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); // old test is moved to msck_repair_2.q @@ -335,17 +342,17 @@ public void testPartitionsCheck() throws HiveException, MetaException, hive.dropTable(dbName, tableName, true, true); hive.createTable(table); result = new CheckResult(); - checker.checkMetastore(dbName, null, null, result); + checker.checkMetastore(catName, dbName, null, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); //--0e + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotInMs()); //--0e System.err.println("Test completed - partition check"); } @Test - public void testDataDeletion() throws HiveException, MetaException, - IOException, TException, AlreadyExistsException, NoSuchObjectException { + public void testDataDeletion() throws HiveException, + IOException, TException { Database db = new Database(); db.setName(dbName); @@ -381,15 +388,15 @@ public void testDataDeletion() throws HiveException, MetaException, * Test multi-threaded implementation of checker to find out missing partitions */ @Test - public void testPartitionsNotInMs() throws HiveException, AlreadyExistsException, IOException { + public void testPartitionsNotInMs() throws HiveException, AlreadyExistsException, IOException, MetastoreException { Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections.emptySet(), result.getTablesNotInMs()); assertEquals(Collections.emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections.emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } @@ -398,17 +405,17 @@ public void testPartitionsNotInMs() throws HiveException, AlreadyExistsException */ @Test public void testSingleThreadedCheckMetastore() - throws HiveException, AlreadyExistsException, IOException { + throws HiveException, AlreadyExistsException, IOException, MetastoreException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections. emptySet(), result.getTablesNotInMs()); assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } @@ -421,7 +428,7 @@ public void testSingleThreadedCheckMetastore() */ @Test public void testSingleThreadedDeeplyNestedTables() - throws HiveException, AlreadyExistsException, IOException { + throws HiveException, AlreadyExistsException, IOException, MetastoreException { // set num of threads to 0 so that single-threaded checkMetastore is called hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 0); int poolSize = 2; @@ -430,10 +437,10 @@ public void testSingleThreadedDeeplyNestedTables() // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections. emptySet(), result.getTablesNotInMs()); assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } @@ -446,7 +453,7 @@ public void testSingleThreadedDeeplyNestedTables() */ @Test public void testDeeplyNestedPartitionedTables() - throws HiveException, AlreadyExistsException, IOException { + throws HiveException, AlreadyExistsException, IOException, MetastoreException { hive.getConf().setIntVar(HiveConf.ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT, 2); int poolSize = 2; // create a deeply nested table which has more partition keys than the pool size @@ -454,10 +461,10 @@ public void testDeeplyNestedPartitionedTables() // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 10); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections. emptySet(), result.getTablesNotInMs()); assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); assertEquals(10, result.getPartitionsNotInMs().size()); } @@ -482,20 +489,20 @@ public void testErrorForMissingPartitionColumn() throws AlreadyExistsException, CheckResult result = new CheckResult(); Exception exception = null; try { - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); } catch (Exception e) { exception = e; } - assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); + assertTrue("Expected MetastoreException", exception!=null && exception instanceof MetastoreException); createFile(sb.toString(), "dummyFile"); result = new CheckResult(); exception = null; try { - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); } catch (Exception e) { exception = e; } - assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); + assertTrue("Expected MetastoreException", exception!=null && exception instanceof MetastoreException); } /** @@ -506,14 +513,14 @@ public void testErrorForMissingPartitionColumn() throws AlreadyExistsException, * @throws HiveException * @throws IOException */ - @Test(expected = HiveException.class) + @Test(expected = MetastoreException.class) public void testInvalidOrderForPartitionKeysOnFS() - throws AlreadyExistsException, HiveException, IOException { + throws AlreadyExistsException, HiveException, IOException, MetastoreException { Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createInvalidPartitionDirsOnFS(testTable, 10); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); } /* @@ -522,19 +529,19 @@ public void testInvalidOrderForPartitionKeysOnFS() */ @Test public void testSkipInvalidOrderForPartitionKeysOnFS() - throws AlreadyExistsException, HiveException, IOException { + throws AlreadyExistsException, HiveException, IOException, MetastoreException { hive.getConf().set(HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION.varname, "skip"); - checker = new HiveMetaStoreChecker(hive); + checker = new HiveMetaStoreChecker(msc, hive.getConf()); Table testTable = createPartitionedTestTable(dbName, tableName, 2, 0); // add 10 partitions on the filesystem createInvalidPartitionDirsOnFS(testTable, 2); // add 10 partitions on the filesystem createPartitionsDirectoriesOnFS(testTable, 2); CheckResult result = new CheckResult(); - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); assertEquals(Collections. emptySet(), result.getTablesNotInMs()); assertEquals(Collections. emptySet(), result.getTablesNotOnFs()); - assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); + assertEquals(Collections. emptySet(), result.getPartitionsNotOnFs()); // only 2 valid partitions should be added assertEquals(2, result.getPartitionsNotInMs().size()); } @@ -560,20 +567,20 @@ public void testErrorForMissingPartitionsSingleThreaded() CheckResult result = new CheckResult(); Exception exception = null; try { - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); } catch (Exception e) { exception = e; } - assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); + assertTrue("Expected MetastoreException", exception!=null && exception instanceof MetastoreException); createFile(sb.toString(), "dummyFile"); result = new CheckResult(); exception = null; try { - checker.checkMetastore(dbName, tableName, null, result); + checker.checkMetastore(catName, dbName, tableName, null, result); } catch (Exception e) { exception = e; } - assertTrue("Expected HiveException", exception!=null && exception instanceof HiveException); + assertTrue("Expected MetastoreException", exception!=null && exception instanceof MetastoreException); } /** * Creates a test partitioned table with the required level of nested partitions and number of @@ -592,7 +599,7 @@ private Table createPartitionedTestTable(String dbName, String tableName, int nu int valuesPerPartition) throws AlreadyExistsException, HiveException { Database db = new Database(); db.setName(dbName); - hive.createDatabase(db); + hive.createDatabase(db, true); Table table = new Table(dbName, tableName); table.setDbName(dbName); @@ -606,7 +613,7 @@ private Table createPartitionedTestTable(String dbName, String tableName, int nu } table.setPartCols(partKeys); // create table - hive.createTable(table); + hive.createTable(table, true); table = hive.getTable(dbName, tableName); if (valuesPerPartition == 0) { return table; diff --git a/ql/src/test/queries/clientpositive/msck_repair_acid.q b/ql/src/test/queries/clientpositive/msck_repair_acid.q new file mode 100644 index 0000000000..369095d593 --- /dev/null +++ b/ql/src/test/queries/clientpositive/msck_repair_acid.q @@ -0,0 +1,34 @@ +set hive.msck.repair.batch.size=1; +set hive.mv.files.thread=0; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +DROP TABLE IF EXISTS repairtable_n6; + +CREATE TABLE repairtable_n6(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) STORED AS ORC tblproperties ("transactional"="true", "transactional_properties"="insert_only"); + +EXPLAIN LOCKS MSCK TABLE repairtable_n6; +MSCK TABLE repairtable_n6; + +show partitions repairtable_n6; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n6/p1=a/p2=b/; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n6/p1=c/p2=d/; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n6/p1=a/p2=b/datafile; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n6/p1=c/p2=d/datafile; + +EXPLAIN LOCKS MSCK REPAIR TABLE default.repairtable_n6; +MSCK REPAIR TABLE default.repairtable_n6; + +show partitions default.repairtable_n6; + +set hive.mapred.mode=strict; + +dfs -rmr ${system:test.warehouse.dir}/repairtable_n6/p1=c; + +EXPLAIN LOCKS MSCK REPAIR TABLE default.repairtable_n6 DROP PARTITIONS; +MSCK REPAIR TABLE default.repairtable_n6 DROP PARTITIONS; + +show partitions default.repairtable_n6; + +DROP TABLE default.repairtable_n6; diff --git a/ql/src/test/queries/clientpositive/partition_discovery.q b/ql/src/test/queries/clientpositive/partition_discovery.q new file mode 100644 index 0000000000..2f0ff87610 --- /dev/null +++ b/ql/src/test/queries/clientpositive/partition_discovery.q @@ -0,0 +1,77 @@ +set hive.msck.repair.batch.size=1; +set hive.mv.files.thread=0; + +DROP TABLE IF EXISTS repairtable_n7; +DROP TABLE IF EXISTS repairtable_n8; +DROP TABLE IF EXISTS repairtable_n9; +DROP TABLE IF EXISTS repairtable_n10; + +CREATE EXTERNAL TABLE repairtable_n7(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +LOCATION '${system:test.warehouse.dir}/repairtable_n7'; + +describe formatted repairtable_n7; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n7/p1=a/p2=b/; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n7/p1=c/p2=d/; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n7/p1=a/p2=b/datafile; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n7/p1=c/p2=d/datafile; + +MSCK REPAIR TABLE default.repairtable_n7; +show partitions default.repairtable_n7; + +CREATE EXTERNAL TABLE repairtable_n8 LIKE repairtable_n7 +LOCATION '${system:test.warehouse.dir}/repairtable_n8'; + +describe formatted repairtable_n8; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n8/p1=a/p2=b/; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n8/p1=c/p2=d/; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n8/p1=a/p2=b/datafile; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n8/p1=c/p2=d/datafile; + +MSCK REPAIR TABLE default.repairtable_n8; +show partitions default.repairtable_n8; + +CREATE EXTERNAL TABLE repairtable_n9(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +LOCATION '${system:test.warehouse.dir}/repairtable_n9' tblproperties ("partition.retention.period"="10s"); + +describe formatted repairtable_n9; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n9/p1=a/p2=b/; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n9/p1=c/p2=d/; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n9/p1=a/p2=b/datafile; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n9/p1=c/p2=d/datafile; + +set msck.repair.enable.partition.retention=false; +MSCK REPAIR TABLE default.repairtable_n9; +show partitions default.repairtable_n9; + +!sleep 12; + +set msck.repair.enable.partition.retention=true; +-- msck does not drop partitions, so this still should be no-op +MSCK REPAIR TABLE default.repairtable_n9; +show partitions default.repairtable_n9; + +-- this will drop old partitions +MSCK REPAIR TABLE default.repairtable_n9 SYNC PARTITIONS; +show partitions default.repairtable_n9; + +CREATE EXTERNAL TABLE repairtable_n10 PARTITIONED BY(p1,p2) STORED AS ORC AS SELECT * FROM repairtable_n9; +describe formatted repairtable_n10; + +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n10/p1=a/p2=b/; +dfs ${system:test.dfs.mkdir} ${system:test.warehouse.dir}/repairtable_n10/p1=c/p2=d/; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n10/p1=a/p2=b/datafile; +dfs -touchz ${system:test.warehouse.dir}/repairtable_n10/p1=c/p2=d/datafile; + +set msck.repair.enable.partition.retention=false; +!sleep 12; +MSCK REPAIR TABLE default.repairtable_n10; +show partitions default.repairtable_n10; + + +DROP TABLE default.repairtable_n7; +DROP TABLE default.repairtable_n8; +DROP TABLE default.repairtable_n9; +DROP TABLE default.repairtable_n10; diff --git a/ql/src/test/results/clientpositive/create_like.q.out b/ql/src/test/results/clientpositive/create_like.q.out index f4a5ed55a5..6d4e14a399 100644 --- a/ql/src/test/results/clientpositive/create_like.q.out +++ b/ql/src/test/results/clientpositive/create_like.q.out @@ -118,6 +118,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} EXTERNAL TRUE + discover.partitions true numFiles 0 numRows 0 rawDataSize 0 diff --git a/ql/src/test/results/clientpositive/create_like_view.q.out b/ql/src/test/results/clientpositive/create_like_view.q.out index 870f2800cf..7e33e50af6 100644 --- a/ql/src/test/results/clientpositive/create_like_view.q.out +++ b/ql/src/test/results/clientpositive/create_like_view.q.out @@ -172,6 +172,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 0 numRows 0 rawDataSize 0 diff --git a/ql/src/test/results/clientpositive/default_file_format.q.out b/ql/src/test/results/clientpositive/default_file_format.q.out index 0adf5ae741..beef4192c7 100644 --- a/ql/src/test/results/clientpositive/default_file_format.q.out +++ b/ql/src/test/results/clientpositive/default_file_format.q.out @@ -172,6 +172,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true #### A masked pattern was here #### # Storage Information @@ -236,6 +237,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 0 totalSize 0 #### A masked pattern was here #### @@ -472,6 +474,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 0 totalSize 0 #### A masked pattern was here #### @@ -538,6 +541,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 0 totalSize 0 #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out b/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out index 8429ab37b3..e39c589a86 100644 --- a/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out +++ b/ql/src/test/results/clientpositive/druid/druidkafkamini_basic.q.out @@ -350,6 +350,7 @@ STAGE PLANS: columns __time,page,user,language,added,deleted columns.comments columns.types timestamp:string:string:string:int:int + discover.partitions true druid.datasource default.druid_kafka_test druid.fieldNames language,user druid.fieldTypes string,string @@ -391,6 +392,7 @@ STAGE PLANS: columns __time,page,user,language,added,deleted columns.comments columns.types timestamp:string:string:string:int:int + discover.partitions true druid.datasource default.druid_kafka_test druid.fieldNames language,user druid.fieldTypes string,string diff --git a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out index 51470a4ad8..887ee83e36 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_expressions.q.out @@ -278,6 +278,7 @@ STAGE PLANS: columns __time,cstring1,cstring2,cdouble,cfloat,ctinyint,csmallint,cint,cbigint,cboolean1,cboolean2 columns.comments columns.types timestamp with local time zone:string:string:double:float:tinyint:smallint:int:bigint:boolean:boolean + discover.partitions true druid.datasource default.druid_table_n0 druid.fieldNames vc druid.fieldTypes int @@ -310,6 +311,7 @@ STAGE PLANS: columns __time,cstring1,cstring2,cdouble,cfloat,ctinyint,csmallint,cint,cbigint,cboolean1,cboolean2 columns.comments columns.types timestamp with local time zone:string:string:double:float:tinyint:smallint:int:bigint:boolean:boolean + discover.partitions true druid.datasource default.druid_table_n0 druid.fieldNames vc druid.fieldTypes int diff --git a/ql/src/test/results/clientpositive/druid_topn.q.out b/ql/src/test/results/clientpositive/druid_topn.q.out index 179902a261..755e97740c 100644 --- a/ql/src/test/results/clientpositive/druid_topn.q.out +++ b/ql/src/test/results/clientpositive/druid_topn.q.out @@ -42,6 +42,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"__time\":\"true\",\"added\":\"true\",\"anonymous\":\"true\",\"count\":\"true\",\"deleted\":\"true\",\"delta\":\"true\",\"language\":\"true\",\"namespace\":\"true\",\"newpage\":\"true\",\"page\":\"true\",\"robot\":\"true\",\"unpatrolled\":\"true\",\"user\":\"true\",\"variation\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true druid.datasource wikipedia numFiles 0 numRows 0 diff --git a/ql/src/test/results/clientpositive/explain_locks.q.out b/ql/src/test/results/clientpositive/explain_locks.q.out index ed7f1e834f..3183533478 100644 --- a/ql/src/test/results/clientpositive/explain_locks.q.out +++ b/ql/src/test/results/clientpositive/explain_locks.q.out @@ -2,6 +2,7 @@ PREHOOK: query: explain locks drop table test_explain_locks PREHOOK: type: DROPTABLE POSTHOOK: query: explain locks drop table test_explain_locks POSTHOOK: type: DROPTABLE +LOCK INFORMATION: PREHOOK: query: explain locks create table test_explain_locks (a int, b int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default diff --git a/ql/src/test/results/clientpositive/llap/external_table_purge.q.out b/ql/src/test/results/clientpositive/llap/external_table_purge.q.out index 24c778e0a2..4e2f6a394a 100644 --- a/ql/src/test/results/clientpositive/llap/external_table_purge.q.out +++ b/ql/src/test/results/clientpositive/llap/external_table_purge.q.out @@ -119,6 +119,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', 'external.table.purge'='false', #### A masked pattern was here #### test.comment=Table should have data @@ -168,6 +169,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', 'external.table.purge'='true', #### A masked pattern was here #### test.comment=Table should have data @@ -451,6 +453,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', 'external.table.purge'='false', #### A masked pattern was here #### PREHOOK: query: alter table etp_2 add partition (p1='part1') @@ -520,6 +523,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', 'external.table.purge'='true', #### A masked pattern was here #### PREHOOK: query: alter table etp_2 add partition (p1='part1') diff --git a/ql/src/test/results/clientpositive/llap/mm_exim.q.out b/ql/src/test/results/clientpositive/llap/mm_exim.q.out index 37d3952d37..62995f7e75 100644 --- a/ql/src/test/results/clientpositive/llap/mm_exim.q.out +++ b/ql/src/test/results/clientpositive/llap/mm_exim.q.out @@ -642,6 +642,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 3 numRows 6 rawDataSize 37 diff --git a/ql/src/test/results/clientpositive/llap/strict_managed_tables2.q.out b/ql/src/test/results/clientpositive/llap/strict_managed_tables2.q.out index f3b6152cd6..348266c0a0 100644 --- a/ql/src/test/results/clientpositive/llap/strict_managed_tables2.q.out +++ b/ql/src/test/results/clientpositive/llap/strict_managed_tables2.q.out @@ -49,6 +49,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: create table smt2_tab2 (c1 string, c2 string) PREHOOK: type: CREATETABLE @@ -137,6 +138,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: create table smt2_tab5 (c1 string, c2 string) PREHOOK: type: CREATETABLE diff --git a/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out b/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out index 8221b8ca46..9dc8710090 100644 --- a/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out +++ b/ql/src/test/results/clientpositive/llap/table_nonprintable.q.out @@ -26,8 +26,8 @@ POSTHOOK: query: msck repair table table_external POSTHOOK: type: MSCK POSTHOOK: Output: default@table_external Partitions not in metastore: table_external:day=¢Bar -Repair: Cannot add partition table_external:day=Foo due to invalid characters in the name #### A masked pattern was here #### +Repair: Cannot add partition table_external:day=Foo due to invalid characters in the name Found 2 items drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ###Foo drwxr-xr-x - ### USER ### ### GROUP ### 0 ### HDFS DATE ### hdfs://### HDFS PATH ###¢Bar diff --git a/ql/src/test/results/clientpositive/llap/whroot_external1.q.out b/ql/src/test/results/clientpositive/llap/whroot_external1.q.out index cac158c926..4333bf43b6 100644 --- a/ql/src/test/results/clientpositive/llap/whroot_external1.q.out +++ b/ql/src/test/results/clientpositive/llap/whroot_external1.q.out @@ -72,6 +72,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: insert into table wre1_ext1 select * from src where key < 5 PREHOOK: type: QUERY @@ -157,6 +158,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: insert into table wre1_ext2 select * from src where key < 5 PREHOOK: type: QUERY @@ -246,6 +248,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: insert into table wre1_db.wre1_ext3 select * from src where key < 5 PREHOOK: type: QUERY @@ -331,6 +334,7 @@ LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: insert into table wre1_db.wre1_ext4 select * from src where key < 5 PREHOOK: type: QUERY @@ -413,6 +417,7 @@ OUTPUTFORMAT LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: insert into table wre1_ext5 select * from src where key < 5 PREHOOK: type: QUERY @@ -495,6 +500,7 @@ OUTPUTFORMAT LOCATION 'hdfs://### HDFS PATH ###' TBLPROPERTIES ( + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: insert into table wre1_db.wre1_ext6 select * from src where key < 5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/msck_repair_acid.q.out b/ql/src/test/results/clientpositive/msck_repair_acid.q.out new file mode 100644 index 0000000000..902a4b7d80 --- /dev/null +++ b/ql/src/test/results/clientpositive/msck_repair_acid.q.out @@ -0,0 +1,88 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n6 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n6 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE repairtable_n6(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) STORED AS ORC tblproperties ("transactional"="true", "transactional_properties"="insert_only") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: CREATE TABLE repairtable_n6(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) STORED AS ORC tblproperties ("transactional"="true", "transactional_properties"="insert_only") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n6 +PREHOOK: query: EXPLAIN LOCKS MSCK TABLE repairtable_n6 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: EXPLAIN LOCKS MSCK TABLE repairtable_n6 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +LOCK INFORMATION: +default.repairtable_n6 -> SHARED_READ +PREHOOK: query: MSCK TABLE repairtable_n6 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: MSCK TABLE repairtable_n6 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +PREHOOK: query: show partitions repairtable_n6 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n6 +POSTHOOK: query: show partitions repairtable_n6 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n6 +PREHOOK: query: EXPLAIN LOCKS MSCK REPAIR TABLE default.repairtable_n6 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: EXPLAIN LOCKS MSCK REPAIR TABLE default.repairtable_n6 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +LOCK INFORMATION: +default.repairtable_n6 -> EXCLUSIVE +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n6 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n6 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +Partitions not in metastore: repairtable_n6:p1=a/p2=b repairtable_n6:p1=c/p2=d +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable_n6 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n6 +POSTHOOK: query: show partitions default.repairtable_n6 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n6 +p1=a/p2=b +p1=c/p2=d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN LOCKS MSCK REPAIR TABLE default.repairtable_n6 DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: EXPLAIN LOCKS MSCK REPAIR TABLE default.repairtable_n6 DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +LOCK INFORMATION: +default.repairtable_n6 -> EXCLUSIVE +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n6 DROP PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n6 DROP PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n6 +Partitions missing from filesystem: repairtable_n6:p1=c/p2=d +Repair: Dropped partition from metastore hive.default.repairtable_n6:p1=c/p2=d +PREHOOK: query: show partitions default.repairtable_n6 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n6 +POSTHOOK: query: show partitions default.repairtable_n6 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n6 +p1=a/p2=b +PREHOOK: query: DROP TABLE default.repairtable_n6 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n6 +PREHOOK: Output: default@repairtable_n6 +POSTHOOK: query: DROP TABLE default.repairtable_n6 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n6 +POSTHOOK: Output: default@repairtable_n6 diff --git a/ql/src/test/results/clientpositive/msck_repair_drop.q.out b/ql/src/test/results/clientpositive/msck_repair_drop.q.out index 2456734810..27b718c670 100644 --- a/ql/src/test/results/clientpositive/msck_repair_drop.q.out +++ b/ql/src/test/results/clientpositive/msck_repair_drop.q.out @@ -58,16 +58,16 @@ POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n1 DROP PARTITIONS POSTHOOK: type: MSCK POSTHOOK: Output: default@repairtable_n1 Partitions missing from filesystem: repairtable_n1:p1=2/p2=21 repairtable_n1:p1=2/p2=210 repairtable_n1:p1=2/p2=22 repairtable_n1:p1=2/p2=23 repairtable_n1:p1=2/p2=24 repairtable_n1:p1=2/p2=25 repairtable_n1:p1=2/p2=26 repairtable_n1:p1=2/p2=27 repairtable_n1:p1=2/p2=28 repairtable_n1:p1=2/p2=29 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=21 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=210 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=22 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=23 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=24 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=25 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=26 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=27 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=28 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=29 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=21 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=210 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=22 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=23 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=24 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=25 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=26 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=27 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=28 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=29 PREHOOK: query: show partitions default.repairtable_n1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@repairtable_n1 @@ -124,16 +124,16 @@ POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n1 DROP PARTITIONS POSTHOOK: type: MSCK POSTHOOK: Output: default@repairtable_n1 Partitions missing from filesystem: repairtable_n1:p1=2/p2=21 repairtable_n1:p1=2/p2=210 repairtable_n1:p1=2/p2=22 repairtable_n1:p1=2/p2=23 repairtable_n1:p1=2/p2=24 repairtable_n1:p1=2/p2=25 repairtable_n1:p1=2/p2=26 repairtable_n1:p1=2/p2=27 repairtable_n1:p1=2/p2=28 repairtable_n1:p1=2/p2=29 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=21 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=210 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=22 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=23 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=24 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=25 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=26 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=27 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=28 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=29 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=21 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=210 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=22 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=23 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=24 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=25 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=26 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=27 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=28 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=29 PREHOOK: query: show partitions default.repairtable_n1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@repairtable_n1 @@ -190,16 +190,16 @@ POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n1 DROP PARTITIONS POSTHOOK: type: MSCK POSTHOOK: Output: default@repairtable_n1 Partitions missing from filesystem: repairtable_n1:p1=2/p2=21 repairtable_n1:p1=2/p2=210 repairtable_n1:p1=2/p2=22 repairtable_n1:p1=2/p2=23 repairtable_n1:p1=2/p2=24 repairtable_n1:p1=2/p2=25 repairtable_n1:p1=2/p2=26 repairtable_n1:p1=2/p2=27 repairtable_n1:p1=2/p2=28 repairtable_n1:p1=2/p2=29 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=21 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=210 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=22 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=23 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=24 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=25 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=26 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=27 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=28 -Repair: Dropped partition from metastore default.repairtable_n1:p1=2/p2=29 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=21 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=210 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=22 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=23 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=24 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=25 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=26 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=27 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=28 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=2/p2=29 PREHOOK: query: show partitions default.repairtable_n1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@repairtable_n1 @@ -279,8 +279,8 @@ POSTHOOK: type: MSCK POSTHOOK: Output: default@repairtable_n1 Partitions not in metastore: repairtable_n1:p1=5/p2=51 repairtable_n1:p1=5/p2=52 Partitions missing from filesystem: repairtable_n1:p1=3/p2=31 repairtable_n1:p1=3/p2=32 -Repair: Dropped partition from metastore default.repairtable_n1:p1=3/p2=31 -Repair: Dropped partition from metastore default.repairtable_n1:p1=3/p2=32 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=3/p2=31 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=3/p2=32 PREHOOK: query: show partitions default.repairtable_n1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@repairtable_n1 @@ -309,8 +309,8 @@ POSTHOOK: Output: default@repairtable_n1 Partitions not in metastore: repairtable_n1:p1=5/p2=51 repairtable_n1:p1=5/p2=52 Partitions missing from filesystem: repairtable_n1:p1=4/p2=41 repairtable_n1:p1=4/p2=42 #### A masked pattern was here #### -Repair: Dropped partition from metastore default.repairtable_n1:p1=4/p2=41 -Repair: Dropped partition from metastore default.repairtable_n1:p1=4/p2=42 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=4/p2=41 +Repair: Dropped partition from metastore hive.default.repairtable_n1:p1=4/p2=42 PREHOOK: query: show partitions default.repairtable_n1 PREHOOK: type: SHOWPARTITIONS PREHOOK: Input: default@repairtable_n1 diff --git a/ql/src/test/results/clientpositive/partition_discovery.q.out b/ql/src/test/results/clientpositive/partition_discovery.q.out new file mode 100644 index 0000000000..907513639e --- /dev/null +++ b/ql/src/test/results/clientpositive/partition_discovery.q.out @@ -0,0 +1,357 @@ +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n7 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n7 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n8 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n8 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n9 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n9 +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE IF EXISTS repairtable_n10 +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE IF EXISTS repairtable_n10 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE EXTERNAL TABLE repairtable_n7(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n7 +POSTHOOK: query: CREATE EXTERNAL TABLE repairtable_n7(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n7 +PREHOOK: query: describe formatted repairtable_n7 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@repairtable_n7 +POSTHOOK: query: describe formatted repairtable_n7 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@repairtable_n7 +# col_name data_type comment +col string + +# Partition Information +# col_name data_type comment +p1 string +p2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + discover.partitions true + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n7 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n7 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n7 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n7 +Partitions not in metastore: repairtable_n7:p1=a/p2=b repairtable_n7:p1=c/p2=d +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable_n7 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n7 +POSTHOOK: query: show partitions default.repairtable_n7 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n7 +p1=a/p2=b +p1=c/p2=d +PREHOOK: query: CREATE EXTERNAL TABLE repairtable_n8 LIKE repairtable_n7 +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n8 +POSTHOOK: query: CREATE EXTERNAL TABLE repairtable_n8 LIKE repairtable_n7 +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n8 +PREHOOK: query: describe formatted repairtable_n8 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@repairtable_n8 +POSTHOOK: query: describe formatted repairtable_n8 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@repairtable_n8 +# col_name data_type comment +col string + +# Partition Information +# col_name data_type comment +p1 string +p2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + discover.partitions true + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n8 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n8 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n8 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n8 +Partitions not in metastore: repairtable_n8:p1=a/p2=b repairtable_n8:p1=c/p2=d +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable_n8 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n8 +POSTHOOK: query: show partitions default.repairtable_n8 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n8 +p1=a/p2=b +p1=c/p2=d +PREHOOK: query: CREATE EXTERNAL TABLE repairtable_n9(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +#### A masked pattern was here #### +PREHOOK: type: CREATETABLE +#### A masked pattern was here #### +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n9 +POSTHOOK: query: CREATE EXTERNAL TABLE repairtable_n9(col STRING) PARTITIONED BY (p1 STRING, p2 STRING) +#### A masked pattern was here #### +POSTHOOK: type: CREATETABLE +#### A masked pattern was here #### +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n9 +PREHOOK: query: describe formatted repairtable_n9 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@repairtable_n9 +POSTHOOK: query: describe formatted repairtable_n9 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@repairtable_n9 +# col_name data_type comment +col string + +# Partition Information +# col_name data_type comment +p1 string +p2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + discover.partitions true + numFiles 0 + numPartitions 0 + numRows 0 + partition.retention.period 10s + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +InputFormat: org.apache.hadoop.mapred.TextInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n9 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n9 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n9 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n9 +Partitions not in metastore: repairtable_n9:p1=a/p2=b repairtable_n9:p1=c/p2=d +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable_n9 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n9 +POSTHOOK: query: show partitions default.repairtable_n9 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n9 +p1=a/p2=b +p1=c/p2=d +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n9 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n9 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n9 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n9 +Expired partitions (retention period: 10s) : repairtable_n9:p1=a/p2=b repairtable_n9:p1=c/p2=d +PREHOOK: query: show partitions default.repairtable_n9 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n9 +POSTHOOK: query: show partitions default.repairtable_n9 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n9 +p1=a/p2=b +p1=c/p2=d +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n9 SYNC PARTITIONS +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n9 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n9 SYNC PARTITIONS +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n9 +Expired partitions (retention period: 10s) : repairtable_n9:p1=a/p2=b repairtable_n9:p1=c/p2=d +Repair: Dropped partition from metastore hive.default.repairtable_n9:p1=a/p2=b +Repair: Dropped partition from metastore hive.default.repairtable_n9:p1=c/p2=d +PREHOOK: query: show partitions default.repairtable_n9 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n9 +POSTHOOK: query: show partitions default.repairtable_n9 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n9 +PREHOOK: query: CREATE EXTERNAL TABLE repairtable_n10 PARTITIONED BY(p1,p2) STORED AS ORC AS SELECT * FROM repairtable_n9 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@repairtable_n9 +PREHOOK: Output: database:default +PREHOOK: Output: default@repairtable_n10 +PREHOOK: Output: default@repairtable_n10 +POSTHOOK: query: CREATE EXTERNAL TABLE repairtable_n10 PARTITIONED BY(p1,p2) STORED AS ORC AS SELECT * FROM repairtable_n9 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@repairtable_n9 +POSTHOOK: Output: database:default +POSTHOOK: Output: default@repairtable_n10 +PREHOOK: query: describe formatted repairtable_n10 +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@repairtable_n10 +POSTHOOK: query: describe formatted repairtable_n10 +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@repairtable_n10 +# col_name data_type comment +col string + +# Partition Information +# col_name data_type comment +p1 string +p2 string + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: EXTERNAL_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + EXTERNAL TRUE + bucketing_version 2 + discover.partitions true + numFiles 0 + numPartitions 0 + numRows 0 + rawDataSize 0 + totalSize 0 +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: MSCK REPAIR TABLE default.repairtable_n10 +PREHOOK: type: MSCK +PREHOOK: Output: default@repairtable_n10 +POSTHOOK: query: MSCK REPAIR TABLE default.repairtable_n10 +POSTHOOK: type: MSCK +POSTHOOK: Output: default@repairtable_n10 +Partitions not in metastore: repairtable_n10:p1=a/p2=b repairtable_n10:p1=c/p2=d +#### A masked pattern was here #### +PREHOOK: query: show partitions default.repairtable_n10 +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@repairtable_n10 +POSTHOOK: query: show partitions default.repairtable_n10 +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@repairtable_n10 +p1=a/p2=b +p1=c/p2=d +PREHOOK: query: DROP TABLE default.repairtable_n7 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n7 +PREHOOK: Output: default@repairtable_n7 +POSTHOOK: query: DROP TABLE default.repairtable_n7 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n7 +POSTHOOK: Output: default@repairtable_n7 +PREHOOK: query: DROP TABLE default.repairtable_n8 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n8 +PREHOOK: Output: default@repairtable_n8 +POSTHOOK: query: DROP TABLE default.repairtable_n8 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n8 +POSTHOOK: Output: default@repairtable_n8 +PREHOOK: query: DROP TABLE default.repairtable_n9 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n9 +PREHOOK: Output: default@repairtable_n9 +POSTHOOK: query: DROP TABLE default.repairtable_n9 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n9 +POSTHOOK: Output: default@repairtable_n9 +PREHOOK: query: DROP TABLE default.repairtable_n10 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@repairtable_n10 +PREHOOK: Output: default@repairtable_n10 +POSTHOOK: query: DROP TABLE default.repairtable_n10 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@repairtable_n10 +POSTHOOK: Output: default@repairtable_n10 diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out index 02cd814dfe..d854887c65 100644 --- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out +++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out @@ -103,6 +103,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 numPartitions 1 numRows 10 @@ -266,6 +267,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 numPartitions 1 numRows 10 diff --git a/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out b/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out index b2bcd51310..40b6ad7246 100644 --- a/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out +++ b/ql/src/test/results/clientpositive/repl_2_exim_basic.q.out @@ -345,6 +345,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: select * from ext_t_imported PREHOOK: type: QUERY @@ -426,6 +427,7 @@ LOCATION TBLPROPERTIES ( 'EXTERNAL'='FALSE', 'bucketing_version'='2', + 'discover.partitions'='true', 'repl.last.id'='0', #### A masked pattern was here #### PREHOOK: query: select * from ext_t_r_imported diff --git a/ql/src/test/results/clientpositive/show_create_table_alter.q.out b/ql/src/test/results/clientpositive/show_create_table_alter.q.out index 2c75c36a20..9d93ee9739 100644 --- a/ql/src/test/results/clientpositive/show_create_table_alter.q.out +++ b/ql/src/test/results/clientpositive/show_create_table_alter.q.out @@ -32,6 +32,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('comment'='temporary table', 'EXTERNAL'='FALSE') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -67,6 +68,7 @@ LOCATION TBLPROPERTIES ( 'EXTERNAL'='FALSE', 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('comment'='changed comment', 'EXTERNAL'='TRUE') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -101,6 +103,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('SORTBUCKETCOLSPREFIX'='FALSE') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -135,6 +138,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: ALTER TABLE tmp_showcrt1_n1 SET TBLPROPERTIES ('storage_handler'='org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler') PREHOOK: type: ALTERTABLE_PROPERTIES @@ -169,6 +173,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: DROP TABLE tmp_showcrt1_n1 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out b/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out index e554a18e6b..8a56bfcf1e 100644 --- a/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out +++ b/ql/src/test/results/clientpositive/show_create_table_partitioned.q.out @@ -32,6 +32,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: DROP TABLE tmp_showcrt1_n2 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/show_create_table_serde.q.out b/ql/src/test/results/clientpositive/show_create_table_serde.q.out index 8b95c9b98e..a66c09a4d2 100644 --- a/ql/src/test/results/clientpositive/show_create_table_serde.q.out +++ b/ql/src/test/results/clientpositive/show_create_table_serde.q.out @@ -174,6 +174,7 @@ LOCATION #### A masked pattern was here #### TBLPROPERTIES ( 'bucketing_version'='2', + 'discover.partitions'='true', #### A masked pattern was here #### PREHOOK: query: DROP TABLE tmp_showcrt1_n0 PREHOOK: type: DROPTABLE diff --git a/ql/src/test/results/clientpositive/spark/stats_noscan_2.q.out b/ql/src/test/results/clientpositive/spark/stats_noscan_2.q.out index 99c83aa760..9e4596fc63 100644 --- a/ql/src/test/results/clientpositive/spark/stats_noscan_2.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_noscan_2.q.out @@ -49,6 +49,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 totalSize 11 #### A masked pattern was here #### @@ -90,6 +91,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 numRows 6 rawDataSize 6 diff --git a/ql/src/test/results/clientpositive/stats_noscan_2.q.out b/ql/src/test/results/clientpositive/stats_noscan_2.q.out index 891e868b8f..146cbe7e5e 100644 --- a/ql/src/test/results/clientpositive/stats_noscan_2.q.out +++ b/ql/src/test/results/clientpositive/stats_noscan_2.q.out @@ -49,6 +49,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 totalSize 11 #### A masked pattern was here #### @@ -90,6 +91,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 numRows 6 rawDataSize 6 diff --git a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out index 2a442b4822..065cd98506 100644 --- a/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out +++ b/ql/src/test/results/clientpositive/temp_table_display_colstats_tbllvl.q.out @@ -61,6 +61,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adrevenue\":\"true\",\"avgtimeonsite\":\"true\",\"ccode\":\"true\",\"desturl\":\"true\",\"lcode\":\"true\",\"skeyword\":\"true\",\"sourceip\":\"true\",\"useragent\":\"true\",\"visitdate\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 0 numRows 0 rawDataSize 0 @@ -111,6 +112,7 @@ Table Type: EXTERNAL_TABLE Table Parameters: EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 numRows 0 rawDataSize 0 @@ -267,6 +269,7 @@ STAGE PLANS: columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite columns.comments columns.types string:string:string:float:string:string:string:string:int + discover.partitions true field.delim | #### A masked pattern was here #### name default.uservisits_web_text_none @@ -289,6 +292,7 @@ STAGE PLANS: columns sourceip,desturl,visitdate,adrevenue,useragent,ccode,lcode,skeyword,avgtimeonsite columns.comments columns.types string:string:string:float:string:string:string:string:int + discover.partitions true field.delim | #### A masked pattern was here #### name default.uservisits_web_text_none @@ -381,6 +385,7 @@ Table Parameters: COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"adRevenue\":\"true\",\"avgTimeOnSite\":\"true\",\"sourceIP\":\"true\"}} EXTERNAL TRUE bucketing_version 2 + discover.partitions true numFiles 1 numRows 55 rawDataSize 7005 diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java new file mode 100644 index 0000000000..1d89e12d89 --- /dev/null +++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -0,0 +1,948 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.utils; + +import java.beans.PropertyDescriptor; +import java.io.File; +import java.net.URL; +import java.net.URLClassLoader; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TimeZone; +import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; +import java.util.stream.Collectors; + +import static java.util.regex.Pattern.compile; + +import javax.annotation.Nullable; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.ColumnType; +import org.apache.hadoop.hive.metastore.TableType; +import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.security.SaslRpcServer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Joiner; + +public class MetaStoreUtils { + /** A fixed date format to be used for hive partition column values. */ + public static final ThreadLocal PARTITION_DATE_FORMAT = + new ThreadLocal() { + @Override + protected DateFormat initialValue() { + DateFormat val = new SimpleDateFormat("yyyy-MM-dd"); + val.setLenient(false); // Without this, 2020-20-20 becomes 2021-08-20. + val.setTimeZone(TimeZone.getTimeZone("UTC")); + return val; + } + }; + // Indicates a type was derived from the deserializer rather than Hive's metadata. + public static final String TYPE_FROM_DESERIALIZER = ""; + + private static final Logger LOG = LoggerFactory.getLogger(MetaStoreUtils.class); + + // The following two are public for any external users who wish to use them. + /** + * This character is used to mark a database name as having a catalog name prepended. This + * marker should be placed first in the String to make it easy to determine that this has both + * a catalog and a database name. @ is chosen as it is not used in regular expressions. This + * is only intended for use when making old Thrift calls that do not support catalog names. + */ + public static final char CATALOG_DB_THRIFT_NAME_MARKER = '@'; + + /** + * This String is used to seaprate the catalog name from the database name. This should only + * be used in Strings that are prepended with {@link #CATALOG_DB_THRIFT_NAME_MARKER}. # is + * chosen because it is not used in regular expressions. this is only intended for use when + * making old Thrift calls that do not support catalog names. + */ + public static final String CATALOG_DB_SEPARATOR = "#"; + + /** + * Mark a database as being empty (as distinct from null). + */ + public static final String DB_EMPTY_MARKER = "!"; + + public static final String EXTERNAL_TABLE_PURGE = "external.table.purge"; + + // Right now we only support one special character '/'. + // More special characters can be added accordingly in the future. + // NOTE: + // If the following array is updated, please also be sure to update the + // configuration parameter documentation + // HIVE_SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES in HiveConf as well. + private static final char[] specialCharactersInTableNames = new char[] { '/' }; + + /** + * Catches exceptions that can't be handled and bundles them to MetaException + * + * @param e exception to wrap. + * @throws MetaException wrapper for the exception + */ + public static void logAndThrowMetaException(Exception e) throws MetaException { + String exInfo = "Got exception: " + e.getClass().getName() + " " + + e.getMessage(); + LOG.error(exInfo, e); + LOG.error("Converting exception to MetaException"); + throw new MetaException(exInfo); + } + + public static String encodeTableName(String name) { + // The encoding method is simple, e.g., replace + // all the special characters with the corresponding number in ASCII. + // Note that unicode is not supported in table names. And we have explicit + // checks for it. + StringBuilder sb = new StringBuilder(); + for (char ch : name.toCharArray()) { + if (Character.isLetterOrDigit(ch) || ch == '_') { + sb.append(ch); + } else { + sb.append('-').append((int) ch).append('-'); + } + } + return sb.toString(); + } + + /** + * convert Exception to MetaException, which sets the cause to such exception + * @param e cause of the exception + * @return the MetaException with the specified exception as the cause + */ + public static MetaException newMetaException(Exception e) { + return newMetaException(e != null ? e.getMessage() : null, e); + } + + /** + * convert Exception to MetaException, which sets the cause to such exception + * @param errorMessage the error message for this MetaException + * @param e cause of the exception + * @return the MetaException with the specified exception as the cause + */ + public static MetaException newMetaException(String errorMessage, Exception e) { + MetaException metaException = new MetaException(errorMessage); + if (e != null) { + metaException.initCause(e); + } + return metaException; + } + + + public static List getColumnNamesForTable(Table table) { + List colNames = new ArrayList<>(); + Iterator colsIterator = table.getSd().getColsIterator(); + while (colsIterator.hasNext()) { + colNames.add(colsIterator.next().getName()); + } + return colNames; + } + + /** + * validateName + * + * Checks the name conforms to our standars which are: "[a-zA-z_0-9]+". checks + * this is just characters and numbers and _ + * + * @param name + * the name to validate + * @param conf + * hive configuration + * @return true or false depending on conformance + * if it doesn't match the pattern. + */ + public static boolean validateName(String name, Configuration conf) { + Pattern tpat; + String allowedCharacters = "\\w_"; + if (conf != null + && MetastoreConf.getBoolVar(conf, + MetastoreConf.ConfVars.SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES)) { + for (Character c : specialCharactersInTableNames) { + allowedCharacters += c; + } + } + tpat = Pattern.compile("[" + allowedCharacters + "]+"); + Matcher m = tpat.matcher(name); + return m.matches(); + } + + /** + * Determines whether a table is an external table. + * + * @param table table of interest + * + * @return true if external + */ + public static boolean isExternalTable(Table table) { + if (table == null) { + return false; + } + Map params = table.getParameters(); + if (params == null) { + return false; + } + + return isExternal(params); + } + + /** + * Determines whether an table needs to be purged or not. + * + * @param table table of interest + * + * @return true if external table needs to be purged + */ + public static boolean isExternalTablePurge(Table table) { + if (table == null) { + return false; + } + Map params = table.getParameters(); + if (params == null) { + return false; + } + + return isPropertyTrue(params, EXTERNAL_TABLE_PURGE); + } + + public static boolean isExternal(Map tableParams){ + return isPropertyTrue(tableParams, "EXTERNAL"); + } + + public static boolean isPropertyTrue(Map tableParams, String prop) { + return "TRUE".equalsIgnoreCase(tableParams.get(prop)); + } + + + /** Duplicates AcidUtils; used in a couple places in metastore. */ + public static boolean isInsertOnlyTableParam(Map params) { + String transactionalProp = params.get(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES); + return (transactionalProp != null && "insert_only".equalsIgnoreCase(transactionalProp)); + } + + public static boolean isNonNativeTable(Table table) { + if (table == null || table.getParameters() == null) { + return false; + } + return (table.getParameters().get(hive_metastoreConstants.META_TABLE_STORAGE) != null); + } + + /** + * Given a list of partition columns and a partial mapping from + * some partition columns to values the function returns the values + * for the column. + * @param partCols the list of table partition columns + * @param partSpec the partial mapping from partition column to values + * @return list of values of for given partition columns, any missing + * values in partSpec is replaced by an empty string + */ + public static List getPvals(List partCols, + Map partSpec) { + List pvals = new ArrayList<>(partCols.size()); + for (FieldSchema field : partCols) { + String val = StringUtils.defaultString(partSpec.get(field.getName())); + pvals.add(val); + } + return pvals; + } + public static String makePartNameMatcher(Table table, List partVals, String defaultStr) throws MetaException { + List partCols = table.getPartitionKeys(); + int numPartKeys = partCols.size(); + if (partVals.size() > numPartKeys) { + throw new MetaException("Incorrect number of partition values." + + " numPartKeys=" + numPartKeys + ", part_val=" + partVals); + } + partCols = partCols.subList(0, partVals.size()); + // Construct a pattern of the form: partKey=partVal/partKey2=partVal2/... + // where partVal is either the escaped partition value given as input, + // or a regex of the form ".*" + // This works because the "=" and "/" separating key names and partition key/values + // are not escaped. + String partNameMatcher = Warehouse.makePartName(partCols, partVals, defaultStr); + // add ".*" to the regex to match anything else afterwards the partial spec. + if (partVals.size() < numPartKeys) { + partNameMatcher += defaultStr; + } + return partNameMatcher; + } + + /** + * @param schema1: The first schema to be compared + * @param schema2: The second schema to be compared + * @return true if the two schemas are the same else false + * for comparing a field we ignore the comment it has + */ + public static boolean compareFieldColumns(List schema1, List schema2) { + if (schema1.size() != schema2.size()) { + return false; + } + Iterator its1 = schema1.iterator(); + Iterator its2 = schema2.iterator(); + while (its1.hasNext()) { + FieldSchema f1 = its1.next(); + FieldSchema f2 = its2.next(); + // The default equals provided by thrift compares the comments too for + // equality, thus we need to compare the relevant fields here. + if (!StringUtils.equals(f1.getName(), f2.getName()) || + !StringUtils.equals(f1.getType(), f2.getType())) { + return false; + } + } + return true; + } + + public static boolean isArchived(Partition part) { + Map params = part.getParameters(); + return (params != null && "TRUE".equalsIgnoreCase(params.get(hive_metastoreConstants.IS_ARCHIVED))); + } + + public static Path getOriginalLocation(Partition part) { + Map params = part.getParameters(); + assert(isArchived(part)); + String originalLocation = params.get(hive_metastoreConstants.ORIGINAL_LOCATION); + assert( originalLocation != null); + + return new Path(originalLocation); + } + + private static String ARCHIVING_LEVEL = "archiving_level"; + public static int getArchivingLevel(Partition part) throws MetaException { + if (!isArchived(part)) { + throw new MetaException("Getting level of unarchived partition"); + } + + String lv = part.getParameters().get(ARCHIVING_LEVEL); + if (lv != null) { + return Integer.parseInt(lv); + } + // partitions archived before introducing multiple archiving + return part.getValues().size(); + } + + /** + * Read and return the meta store Sasl configuration. Currently it uses the default + * Hadoop SASL configuration and can be configured using "hadoop.rpc.protection" + * HADOOP-10211, made a backward incompatible change due to which this call doesn't + * work with Hadoop 2.4.0 and later. + * @param conf + * @return The SASL configuration + */ + public static Map getMetaStoreSaslProperties(Configuration conf, boolean useSSL) { + // As of now Hive Meta Store uses the same configuration as Hadoop SASL configuration + + // If SSL is enabled, override the given value of "hadoop.rpc.protection" and set it to "authentication" + // This disables any encryption provided by SASL, since SSL already provides it + String hadoopRpcProtectionVal = conf.get(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION); + String hadoopRpcProtectionAuth = SaslRpcServer.QualityOfProtection.AUTHENTICATION.toString(); + + if (useSSL && hadoopRpcProtectionVal != null && !hadoopRpcProtectionVal.equals(hadoopRpcProtectionAuth)) { + LOG.warn("Overriding value of " + CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION + " setting it from " + + hadoopRpcProtectionVal + " to " + hadoopRpcProtectionAuth + " because SSL is enabled"); + conf.set(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION, hadoopRpcProtectionAuth); + } + return HadoopThriftAuthBridge.getBridge().getHadoopSaslProperties(conf); + } + + /** + * Add new elements to the classpath. + * + * @param newPaths + * Array of classpath elements + */ + public static ClassLoader addToClassPath(ClassLoader cloader, String[] newPaths) throws Exception { + URLClassLoader loader = (URLClassLoader) cloader; + List curPath = Arrays.asList(loader.getURLs()); + ArrayList newPath = new ArrayList<>(curPath.size()); + + // get a list with the current classpath components + for (URL onePath : curPath) { + newPath.add(onePath); + } + curPath = newPath; + + for (String onestr : newPaths) { + URL oneurl = urlFromPathString(onestr); + if (oneurl != null && !curPath.contains(oneurl)) { + curPath.add(oneurl); + } + } + + return new URLClassLoader(curPath.toArray(new URL[0]), loader); + } + + /** + * Create a URL from a string representing a path to a local file. + * The path string can be just a path, or can start with file:/, file:/// + * @param onestr path string + * @return + */ + private static URL urlFromPathString(String onestr) { + URL oneurl = null; + try { + if (onestr.startsWith("file:/")) { + oneurl = new URL(onestr); + } else { + oneurl = new File(onestr).toURL(); + } + } catch (Exception err) { + LOG.error("Bad URL " + onestr + ", ignoring path"); + } + return oneurl; + } + + /** + * Convert FieldSchemas to Thrift DDL. + */ + public static String getDDLFromFieldSchema(String structName, + List fieldSchemas) { + StringBuilder ddl = new StringBuilder(); + ddl.append("struct "); + ddl.append(structName); + ddl.append(" { "); + boolean first = true; + for (FieldSchema col : fieldSchemas) { + if (first) { + first = false; + } else { + ddl.append(", "); + } + ddl.append(ColumnType.typeToThriftType(col.getType())); + ddl.append(' '); + ddl.append(col.getName()); + } + ddl.append("}"); + + LOG.trace("DDL: {}", ddl); + return ddl.toString(); + } + + public static Properties getTableMetadata( + org.apache.hadoop.hive.metastore.api.Table table) { + return MetaStoreUtils.getSchema(table.getSd(), table.getSd(), table + .getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); + } + + public static Properties getPartitionMetadata( + org.apache.hadoop.hive.metastore.api.Partition partition, + org.apache.hadoop.hive.metastore.api.Table table) { + return MetaStoreUtils + .getSchema(partition.getSd(), partition.getSd(), partition + .getParameters(), table.getDbName(), table.getTableName(), + table.getPartitionKeys()); + } + + public static Properties getSchema( + org.apache.hadoop.hive.metastore.api.Partition part, + org.apache.hadoop.hive.metastore.api.Table table) { + return MetaStoreUtils.getSchema(part.getSd(), table.getSd(), table + .getParameters(), table.getDbName(), table.getTableName(), table.getPartitionKeys()); + } + + /** + * Get partition level schema from table level schema. + * This function will use the same column names, column types and partition keys for + * each partition Properties. Their values are copied from the table Properties. This + * is mainly to save CPU and memory. CPU is saved because the first time the + * StorageDescriptor column names are accessed, JDO needs to execute a SQL query to + * retrieve the data. If we know the data will be the same as the table level schema + * and they are immutable, we should just reuse the table level schema objects. + * + * @param sd The Partition level Storage Descriptor. + * @param parameters partition level parameters + * @param tblSchema The table level schema from which this partition should be copied. + * @return the properties + */ + public static Properties getPartSchemaFromTableSchema( + StorageDescriptor sd, + Map parameters, + Properties tblSchema) { + + // Inherent most properties from table level schema and overwrite some properties + // in the following code. + // This is mainly for saving CPU and memory to reuse the column names, types and + // partition columns in the table level schema. + Properties schema = (Properties) tblSchema.clone(); + + // InputFormat + String inputFormat = sd.getInputFormat(); + if (inputFormat == null || inputFormat.length() == 0) { + String tblInput = + schema.getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT); + if (tblInput == null) { + inputFormat = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); + } else { + inputFormat = tblInput; + } + } + schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT, + inputFormat); + + // OutputFormat + String outputFormat = sd.getOutputFormat(); + if (outputFormat == null || outputFormat.length() == 0) { + String tblOutput = + schema.getProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT); + if (tblOutput == null) { + outputFormat = org.apache.hadoop.mapred.SequenceFileOutputFormat.class.getName(); + } else { + outputFormat = tblOutput; + } + } + schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT, + outputFormat); + + // Location + if (sd.getLocation() != null) { + schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION, + sd.getLocation()); + } + + // Bucket count + schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT, + Integer.toString(sd.getNumBuckets())); + + if (sd.getBucketCols() != null && sd.getBucketCols().size() > 0) { + schema.setProperty(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_FIELD_NAME, + Joiner.on(",").join(sd.getBucketCols())); + } + + // SerdeInfo + if (sd.getSerdeInfo() != null) { + + // We should not update the following 3 values if SerDeInfo contains these. + // This is to keep backward compatible with getSchema(), where these 3 keys + // are updated after SerDeInfo properties got copied. + String cols = org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS; + String colTypes = org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES; + String parts = org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS; + + for (Map.Entry param : sd.getSerdeInfo().getParameters().entrySet()) { + String key = param.getKey(); + if (schema.get(key) != null && + (key.equals(cols) || key.equals(colTypes) || key.equals(parts) || + // skip Druid properties which are used in DruidSerde, since they are also updated + // after SerDeInfo properties are copied. + key.startsWith("druid."))) { + continue; + } + schema.put(key, (param.getValue() != null) ? param.getValue() : StringUtils.EMPTY); + } + + if (sd.getSerdeInfo().getSerializationLib() != null) { + schema.setProperty(ColumnType.SERIALIZATION_LIB, sd.getSerdeInfo().getSerializationLib()); + } + } + + // skipping columns since partition level field schemas are the same as table level's + // skipping partition keys since it is the same as table level partition keys + + if (parameters != null) { + for (Map.Entry e : parameters.entrySet()) { + schema.setProperty(e.getKey(), e.getValue()); + } + } + + return schema; + } + + private static Properties addCols(Properties schema, List cols) { + + StringBuilder colNameBuf = new StringBuilder(); + StringBuilder colTypeBuf = new StringBuilder(); + StringBuilder colComment = new StringBuilder(); + + boolean first = true; + String columnNameDelimiter = getColumnNameDelimiter(cols); + for (FieldSchema col : cols) { + if (!first) { + colNameBuf.append(columnNameDelimiter); + colTypeBuf.append(":"); + colComment.append('\0'); + } + colNameBuf.append(col.getName()); + colTypeBuf.append(col.getType()); + colComment.append((null != col.getComment()) ? col.getComment() : StringUtils.EMPTY); + first = false; + } + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMNS, + colNameBuf.toString()); + schema.setProperty(ColumnType.COLUMN_NAME_DELIMITER, columnNameDelimiter); + String colTypes = colTypeBuf.toString(); + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_COLUMN_TYPES, + colTypes); + schema.setProperty("columns.comments", colComment.toString()); + + return schema; + + } + + public static Properties getSchemaWithoutCols(StorageDescriptor sd, + Map parameters, String databaseName, String tableName, + List partitionKeys) { + Properties schema = new Properties(); + String inputFormat = sd.getInputFormat(); + if (inputFormat == null || inputFormat.length() == 0) { + inputFormat = org.apache.hadoop.mapred.SequenceFileInputFormat.class + .getName(); + } + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_INPUT_FORMAT, + inputFormat); + String outputFormat = sd.getOutputFormat(); + if (outputFormat == null || outputFormat.length() == 0) { + outputFormat = org.apache.hadoop.mapred.SequenceFileOutputFormat.class + .getName(); + } + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.FILE_OUTPUT_FORMAT, + outputFormat); + + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME, + databaseName + "." + tableName); + + if (sd.getLocation() != null) { + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION, + sd.getLocation()); + } + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT, Integer + .toString(sd.getNumBuckets())); + if (sd.getBucketCols() != null && sd.getBucketCols().size() > 0) { + schema.setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_FIELD_NAME, + Joiner.on(",").join(sd.getBucketCols())); + } + if (sd.getSerdeInfo() != null) { + for (Map.Entry param : sd.getSerdeInfo().getParameters().entrySet()) { + schema.put(param.getKey(), (param.getValue() != null) ? param.getValue() : StringUtils.EMPTY); + } + + if (sd.getSerdeInfo().getSerializationLib() != null) { + schema.setProperty(ColumnType.SERIALIZATION_LIB, sd .getSerdeInfo().getSerializationLib()); + } + } + + if (sd.getCols() != null) { + schema.setProperty(ColumnType.SERIALIZATION_DDL, getDDLFromFieldSchema(tableName, sd.getCols())); + } + + String partString = StringUtils.EMPTY; + String partStringSep = StringUtils.EMPTY; + String partTypesString = StringUtils.EMPTY; + String partTypesStringSep = StringUtils.EMPTY; + for (FieldSchema partKey : partitionKeys) { + partString = partString.concat(partStringSep); + partString = partString.concat(partKey.getName()); + partTypesString = partTypesString.concat(partTypesStringSep); + partTypesString = partTypesString.concat(partKey.getType()); + if (partStringSep.length() == 0) { + partStringSep = "/"; + partTypesStringSep = ":"; + } + } + if (partString.length() > 0) { + schema + .setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS, + partString); + schema + .setProperty( + org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES, + partTypesString); + } + + if (parameters != null) { + for (Map.Entry e : parameters.entrySet()) { + // add non-null parameters to the schema + if ( e.getValue() != null) { + schema.setProperty(e.getKey(), e.getValue()); + } + } + } + + return schema; + } + + public static Properties getSchema( + org.apache.hadoop.hive.metastore.api.StorageDescriptor sd, + org.apache.hadoop.hive.metastore.api.StorageDescriptor tblsd, + Map parameters, String databaseName, String tableName, + List partitionKeys) { + + return addCols(getSchemaWithoutCols(sd, parameters, databaseName, tableName, partitionKeys), tblsd.getCols()); + } + + public static String getColumnNameDelimiter(List fieldSchemas) { + // we first take a look if any fieldSchemas contain COMMA + for (int i = 0; i < fieldSchemas.size(); i++) { + if (fieldSchemas.get(i).getName().contains(",")) { + return String.valueOf(ColumnType.COLUMN_COMMENTS_DELIMITER); + } + } + return String.valueOf(','); + } + + /** + * Convert FieldSchemas to columnNames. + */ + public static String getColumnNamesFromFieldSchema(List fieldSchemas) { + String delimiter = getColumnNameDelimiter(fieldSchemas); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < fieldSchemas.size(); i++) { + if (i > 0) { + sb.append(delimiter); + } + sb.append(fieldSchemas.get(i).getName()); + } + return sb.toString(); + } + + /** + * Convert FieldSchemas to columnTypes. + */ + public static String getColumnTypesFromFieldSchema( + List fieldSchemas) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < fieldSchemas.size(); i++) { + if (i > 0) { + sb.append(","); + } + sb.append(fieldSchemas.get(i).getType()); + } + return sb.toString(); + } + + public static String getColumnCommentsFromFieldSchema(List fieldSchemas) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < fieldSchemas.size(); i++) { + if (i > 0) { + sb.append(ColumnType.COLUMN_COMMENTS_DELIMITER); + } + sb.append(fieldSchemas.get(i).getComment()); + } + return sb.toString(); + } + + public static boolean isMaterializedViewTable(Table table) { + if (table == null) { + return false; + } + return TableType.MATERIALIZED_VIEW.toString().equals(table.getTableType()); + } + + public static List getColumnNames(List schema) { + List cols = new ArrayList<>(schema.size()); + for (FieldSchema fs : schema) { + cols.add(fs.getName()); + } + return cols; + } + + public static boolean isValidSchedulingPolicy(String str) { + try { + parseSchedulingPolicy(str); + return true; + } catch (IllegalArgumentException ex) { + } + return false; + } + + public static WMPoolSchedulingPolicy parseSchedulingPolicy(String schedulingPolicy) { + if (schedulingPolicy == null) { + return WMPoolSchedulingPolicy.FAIR; + } + schedulingPolicy = schedulingPolicy.trim().toUpperCase(); + if ("DEFAULT".equals(schedulingPolicy)) { + return WMPoolSchedulingPolicy.FAIR; + } + return Enum.valueOf(WMPoolSchedulingPolicy.class, schedulingPolicy); + } + + private static boolean hasCatalogName(String dbName) { + return dbName != null && dbName.length() > 0 && + dbName.charAt(0) == CATALOG_DB_THRIFT_NAME_MARKER; + } + + /** + * Given a catalog name and database name cram them together into one string. This method can + * be used if you do not know the catalog name, in which case the default catalog will be + * retrieved from the conf object. The resulting string can be parsed apart again via + * {@link #parseDbName(String, Configuration)}. + * @param catalogName catalog name, can be null if no known. + * @param dbName database name, can be null or empty. + * @param conf configuration object, used to determine default catalog if catalogName is null + * @return one string that contains both. + */ + public static String prependCatalogToDbName(@Nullable String catalogName, @Nullable String dbName, + Configuration conf) { + if (catalogName == null) catalogName = getDefaultCatalog(conf); + StringBuilder buf = new StringBuilder() + .append(CATALOG_DB_THRIFT_NAME_MARKER) + .append(catalogName) + .append(CATALOG_DB_SEPARATOR); + if (dbName != null) { + if (dbName.isEmpty()) buf.append(DB_EMPTY_MARKER); + else buf.append(dbName); + } + return buf.toString(); + } + + /** + * Given a catalog name and database name, cram them together into one string. These can be + * parsed apart again via {@link #parseDbName(String, Configuration)}. + * @param catalogName catalog name. This cannot be null. If this might be null use + * {@link #prependCatalogToDbName(String, String, Configuration)} instead. + * @param dbName database name. + * @return one string that contains both. + */ + public static String prependNotNullCatToDbName(String catalogName, String dbName) { + assert catalogName != null; + return prependCatalogToDbName(catalogName, dbName, null); + } + + /** + * Prepend the default 'hive' catalog onto the database name. + * @param dbName database name + * @param conf configuration object, used to determine default catalog + * @return one string with the 'hive' catalog name prepended. + */ + public static String prependCatalogToDbName(String dbName, Configuration conf) { + return prependCatalogToDbName(null, dbName, conf); + } + + private final static String[] nullCatalogAndDatabase = {null, null}; + + /** + * Parse the catalog name out of the database name. If no catalog name is present then the + * default catalog (as set in configuration file) will be assumed. + * @param dbName name of the database. This may or may not contain the catalog name. + * @param conf configuration object, used to determine the default catalog if it is not present + * in the database name. + * @return an array of two elements, the first being the catalog name, the second the database + * name. + * @throws MetaException if the name is not either just a database name or a catalog plus + * database name with the proper delimiters. + */ + public static String[] parseDbName(String dbName, Configuration conf) throws MetaException { + if (dbName == null) return nullCatalogAndDatabase; + if (hasCatalogName(dbName)) { + if (dbName.endsWith(CATALOG_DB_SEPARATOR)) { + // This means the DB name is null + return new String[] {dbName.substring(1, dbName.length() - 1), null}; + } else if (dbName.endsWith(DB_EMPTY_MARKER)) { + // This means the DB name is empty + return new String[] {dbName.substring(1, dbName.length() - DB_EMPTY_MARKER.length() - 1), ""}; + } + String[] names = dbName.substring(1).split(CATALOG_DB_SEPARATOR, 2); + if (names.length != 2) { + throw new MetaException(dbName + " is prepended with the catalog marker but does not " + + "appear to have a catalog name in it"); + } + return names; + } else { + return new String[] {getDefaultCatalog(conf), dbName}; + } + } + + /** + * Position in the array returned by {@link #parseDbName} that has the catalog name. + */ + public static final int CAT_NAME = 0; + /** + * Position in the array returned by {@link #parseDbName} that has the database name. + */ + public static final int DB_NAME = 1; + + public static String getDefaultCatalog(Configuration conf) { + if (conf == null) { + LOG.warn("Configuration is null, so going with default catalog."); + return Warehouse.DEFAULT_CATALOG_NAME; + } + String catName = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.CATALOG_DEFAULT); + if (catName == null || "".equals(catName)) catName = Warehouse.DEFAULT_CATALOG_NAME; + return catName; + } + + public static boolean isView(Table table) { + if (table == null) { + return false; + } + return TableType.VIRTUAL_VIEW.toString().equals(table.getTableType()); + } + + /** + * filters a given map with predicate provided. All entries of map whose key matches with + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicate Predicate with pattern to filter the map + */ + public static void filterMapKeys(Map map, Predicate predicate) { + if (map == null) { + return; + } + map.entrySet().removeIf(entry -> predicate.test(entry.getKey())); + } + + /** + * filters a given map with list of predicates. All entries of map whose key matches with any + * predicate will be removed. Expects map to be modifiable and does the operation on actual map, + * so does not return a copy of filtered map. + * @param map A map of String key-value pairs + * @param predicates List of predicates with patterns to filter the map + */ + public static void filterMapkeys(Map map, List> predicates) { + if (map == null) { + return; + } + filterMapKeys(map, predicates.stream().reduce(Predicate::or).orElse(x -> false)); + } + + /** + * Compile a list of regex patterns and collect them as Predicates. + * @param patterns List of regex patterns to be compiled + * @return a List of Predicate created by compiling the regex patterns + */ + public static List> compilePatternsToPredicates(List patterns) { + return patterns.stream().map(pattern -> compile(pattern).asPredicate()).collect(Collectors.toList()); + } +} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java similarity index 90% rename from ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java rename to standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java index 0b4240f566..5287f47e21 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/CheckResult.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.metadata; +package org.apache.hadoop.hive.metastore; import java.util.Set; import java.util.TreeSet; @@ -25,10 +25,12 @@ */ public class CheckResult { + // tree sets to preserve ordering in qfile tests private Set tablesNotOnFs = new TreeSet(); private Set tablesNotInMs = new TreeSet(); private Set partitionsNotOnFs = new TreeSet(); private Set partitionsNotInMs = new TreeSet(); + private Set expiredPartitions = new TreeSet<>(); /** * @return a list of tables not found on the filesystem. @@ -90,6 +92,15 @@ public void setPartitionsNotInMs(Set partitionsNotInMs) { this.partitionsNotInMs = partitionsNotInMs; } + public Set getExpiredPartitions() { + return expiredPartitions; + } + + public void setExpiredPartitions( + final Set expiredPartitions) { + this.expiredPartitions = expiredPartitions; + } + /** * A basic description of a partition that is missing from either the fs or * the ms. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java similarity index 71% rename from ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java rename to standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java index 598bb2ee8b..8d27a8207e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java @@ -15,9 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.metadata; +package org.apache.hadoop.hive.metastore; + +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getAllPartitionsOf; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getDataLocation; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPartColNames; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPartCols; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPartition; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPartitionName; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPartitionSpec; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.getPath; +import static org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.isPartitioned; import java.io.IOException; +import java.time.Instant; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -33,27 +44,26 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; -import com.google.common.collect.Sets; -import org.apache.hadoop.hive.common.StringInternUtils; -import org.apache.hadoop.hive.metastore.api.FieldSchema; -import org.apache.hadoop.hive.ql.log.PerfLogger; -import org.apache.hadoop.hive.ql.session.SessionState; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.conf.HiveConf.ConfVars; -import org.apache.hadoop.hive.metastore.Warehouse; +import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.ql.metadata.CheckResult.PartitionResult; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.MetastoreException; import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import com.google.common.collect.Interner; +import com.google.common.collect.Interners; +import com.google.common.collect.Sets; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -65,21 +75,33 @@ public class HiveMetaStoreChecker { public static final Logger LOG = LoggerFactory.getLogger(HiveMetaStoreChecker.class); - public static final String CLASS_NAME = HiveMetaStoreChecker.class.getName(); - private final Hive hive; - private final HiveConf conf; + private final IMetaStoreClient msc; + private final Configuration conf; + private final long partitionExpirySeconds; + private final Interner pathInterner = Interners.newStrongInterner(); + + public HiveMetaStoreChecker(IMetaStoreClient msc, Configuration conf) { + this(msc, conf, -1); + } - public HiveMetaStoreChecker(Hive hive) { + public HiveMetaStoreChecker(IMetaStoreClient msc, Configuration conf, long partitionExpirySeconds) { super(); - this.hive = hive; - conf = hive.getConf(); + this.msc = msc; + this.conf = conf; + this.partitionExpirySeconds = partitionExpirySeconds; + } + + public IMetaStoreClient getMsc() { + return msc; } /** * Check the metastore for inconsistencies, data missing in either the * metastore or on the dfs. * + * @param catName + * name of the catalog, if not specified default catalog will be used. * @param dbName * name of the database, if not specified the default will be used. * @param tableName @@ -90,14 +112,14 @@ public HiveMetaStoreChecker(Hive hive) { * partitions * @param result * Fill this with the results of the check - * @throws HiveException + * @throws MetastoreException * Failed to get required information from the metastore. * @throws IOException * Most likely filesystem related */ - public void checkMetastore(String dbName, String tableName, + public void checkMetastore(String catName, String dbName, String tableName, List> partitions, CheckResult result) - throws HiveException, IOException { + throws MetastoreException, IOException { if (dbName == null || "".equalsIgnoreCase(dbName)) { dbName = Warehouse.DEFAULT_DATABASE_NAME; @@ -106,41 +128,40 @@ public void checkMetastore(String dbName, String tableName, try { if (tableName == null || "".equals(tableName)) { // no table specified, check all tables and all partitions. - List tables = hive.getTablesForDb(dbName, ".*"); + List tables = getMsc().getTables(catName, dbName, ".*"); for (String currentTableName : tables) { - checkTable(dbName, currentTableName, null, result); + checkTable(catName, dbName, currentTableName, null, result); } - findUnknownTables(dbName, tables, result); + findUnknownTables(catName, dbName, tables, result); } else if (partitions == null || partitions.isEmpty()) { // only one table, let's check all partitions - checkTable(dbName, tableName, null, result); + checkTable(catName, dbName, tableName, null, result); } else { // check the specified partitions - checkTable(dbName, tableName, partitions, result); + checkTable(catName, dbName, tableName, partitions, result); } LOG.info("Number of partitionsNotInMs=" + result.getPartitionsNotInMs() + ", partitionsNotOnFs=" + result.getPartitionsNotOnFs() + ", tablesNotInMs=" + result.getTablesNotInMs() - + ", tablesNotOnFs=" + result.getTablesNotOnFs()); - } catch (MetaException e) { - throw new HiveException(e); + + ", tablesNotOnFs=" + result.getTablesNotOnFs() + + ", expiredPartitions=" + result.getExpiredPartitions()); } catch (TException e) { - throw new HiveException(e); + throw new MetastoreException(e); } } /** * Check for table directories that aren't in the metastore. * + * @param catName + * name of the catalog, if not specified default catalog will be used. * @param dbName * Name of the database * @param tables * List of table names * @param result * Add any found tables to this - * @throws HiveException - * Failed to get required information from the metastore. * @throws IOException * Most likely filesystem related * @throws MetaException @@ -150,18 +171,21 @@ public void checkMetastore(String dbName, String tableName, * @throws TException * Thrift communication error. */ - void findUnknownTables(String dbName, List tables, CheckResult result) - throws IOException, MetaException, TException, HiveException { + void findUnknownTables(String catName, String dbName, List tables, CheckResult result) + throws IOException, MetaException, TException { Set dbPaths = new HashSet(); Set tableNames = new HashSet(tables); for (String tableName : tables) { - Table table = hive.getTable(dbName, tableName); + Table table = getMsc().getTable(catName, dbName, tableName); // hack, instead figure out a way to get the db paths String isExternal = table.getParameters().get("EXTERNAL"); - if (isExternal == null || !"TRUE".equalsIgnoreCase(isExternal)) { - dbPaths.add(table.getPath().getParent()); + if (!"TRUE".equalsIgnoreCase(isExternal)) { + Path tablePath = getPath(table); + if (tablePath != null) { + dbPaths.add(tablePath.getParent()); + } } } @@ -182,6 +206,8 @@ void findUnknownTables(String dbName, List tables, CheckResult result) * Check the metastore for inconsistencies, data missing in either the * metastore or on the dfs. * + * @param catName + * name of the catalog, if not specified default catalog will be used. * @param dbName * Name of the database * @param tableName @@ -190,22 +216,22 @@ void findUnknownTables(String dbName, List tables, CheckResult result) * Partitions to check, if null or empty get all the partitions. * @param result * Result object - * @throws HiveException + * @throws MetastoreException * Failed to get required information from the metastore. * @throws IOException * Most likely filesystem related * @throws MetaException * Failed to get required information from the metastore. */ - void checkTable(String dbName, String tableName, + void checkTable(String catName, String dbName, String tableName, List> partitions, CheckResult result) - throws MetaException, IOException, HiveException { + throws MetaException, IOException, MetastoreException { - Table table = null; + Table table; try { - table = hive.getTable(dbName, tableName); - } catch (HiveException e) { + table = getMsc().getTable(catName, dbName, tableName); + } catch (TException e) { result.getTablesNotInMs().add(tableName); return; } @@ -213,18 +239,13 @@ void checkTable(String dbName, String tableName, PartitionIterable parts; boolean findUnknownPartitions = true; - if (table.isPartitioned()) { + if (isPartitioned(table)) { if (partitions == null || partitions.isEmpty()) { - String mode = HiveConf.getVar(conf, ConfVars.HIVEMAPREDMODE, (String) null); - if ("strict".equalsIgnoreCase(mode)) { - parts = new PartitionIterable(hive, table, null, conf.getIntVar( - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX)); + int batchSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.BATCH_RETRIEVE_MAX); + if (batchSize > 0) { + parts = new PartitionIterable(getMsc(), table, batchSize); } else { - List loadedPartitions = new ArrayList<>(); - PerfLogger perfLogger = SessionState.getPerfLogger(); - perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); - loadedPartitions.addAll(hive.getAllPartitionsOf(table)); - perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.PARTITION_RETRIEVING); + List loadedPartitions = getAllPartitionsOf(getMsc(), table); parts = new PartitionIterable(loadedPartitions); } } else { @@ -233,9 +254,9 @@ void checkTable(String dbName, String tableName, findUnknownPartitions = false; List loadedPartitions = new ArrayList<>(); for (Map map : partitions) { - Partition part = hive.getPartition(table, map, false); + Partition part = getPartition(getMsc(), table, map); if (part == null) { - PartitionResult pr = new PartitionResult(); + CheckResult.PartitionResult pr = new CheckResult.PartitionResult(); pr.setTableName(tableName); pr.setPartitionName(Warehouse.makePartPath(map)); result.getPartitionsNotInMs().add(pr); @@ -266,14 +287,17 @@ void checkTable(String dbName, String tableName, * Should we try to find unknown partitions? * @throws IOException * Could not get information from filesystem - * @throws HiveException + * @throws MetastoreException * Could not create Partition object */ void checkTable(Table table, PartitionIterable parts, boolean findUnknownPartitions, CheckResult result) throws IOException, - HiveException { + MetastoreException { - Path tablePath = table.getPath(); + Path tablePath = getPath(table); + if (tablePath == null) { + return; + } FileSystem fs = tablePath.getFileSystem(conf); if (!fs.exists(tablePath)) { result.getTablesNotOnFs().add(table.getTableName()); @@ -288,18 +312,38 @@ void checkTable(Table table, PartitionIterable parts, // most likely the user specified an invalid partition continue; } - Path partPath = partition.getDataLocation(); + Path partPath = getDataLocation(table, partition); + if (partPath == null) { + continue; + } fs = partPath.getFileSystem(conf); if (!fs.exists(partPath)) { - PartitionResult pr = new PartitionResult(); - pr.setPartitionName(partition.getName()); - pr.setTableName(partition.getTable().getTableName()); + CheckResult.PartitionResult pr = new CheckResult.PartitionResult(); + pr.setPartitionName(getPartitionName(table, partition)); + pr.setTableName(partition.getTableName()); result.getPartitionsNotOnFs().add(pr); } - for (int i = 0; i < partition.getSpec().size(); i++) { + if (partitionExpirySeconds > 0) { + long currentEpochSecs = Instant.now().getEpochSecond(); + long createdTime = partition.getCreateTime(); + long partitionAgeSeconds = currentEpochSecs - createdTime; + if (partitionAgeSeconds > partitionExpirySeconds) { + CheckResult.PartitionResult pr = new CheckResult.PartitionResult(); + pr.setPartitionName(getPartitionName(table, partition)); + pr.setTableName(partition.getTableName()); + result.getExpiredPartitions().add(pr); + if (LOG.isDebugEnabled()) { + LOG.debug("{}.{}.{}.{} expired. createdAt: {} current: {} age: {}s expiry: {}s", partition.getCatName(), + partition.getDbName(), partition.getTableName(), pr.getPartitionName(), createdTime, currentEpochSecs, + partitionAgeSeconds, partitionExpirySeconds); + } + } + } + + for (int i = 0; i < getPartitionSpec(table, partition).size(); i++) { Path qualifiedPath = partPath.makeQualified(fs); - StringInternUtils.internUriStringsInPath(qualifiedPath); + pathInterner.intern(qualifiedPath); partPaths.add(qualifiedPath); partPath = partPath.getParent(); } @@ -321,16 +365,19 @@ void checkTable(Table table, PartitionIterable parts, * Result object * @throws IOException * Thrown if we fail at fetching listings from the fs. - * @throws HiveException + * @throws MetastoreException */ void findUnknownPartitions(Table table, Set partPaths, - CheckResult result) throws IOException, HiveException { + CheckResult result) throws IOException, MetastoreException { - Path tablePath = table.getPath(); + Path tablePath = getPath(table); + if (tablePath == null) { + return; + } // now check the table folder and see if we find anything // that isn't in the metastore Set allPartDirs = new HashSet(); - checkPartitionDirs(tablePath, allPartDirs, Collections.unmodifiableList(table.getPartColNames())); + checkPartitionDirs(tablePath, allPartDirs, Collections.unmodifiableList(getPartColNames(table))); // don't want the table dir allPartDirs.remove(tablePath); @@ -338,7 +385,7 @@ void findUnknownPartitions(Table table, Set partPaths, allPartDirs.removeAll(partPaths); Set partColNames = Sets.newHashSet(); - for(FieldSchema fSchema : table.getPartCols()) { + for(FieldSchema fSchema : getPartCols(table)) { partColNames.add(fSchema.getName()); } @@ -350,7 +397,7 @@ void findUnknownPartitions(Table table, Set partPaths, LOG.debug("PartitionName: " + partitionName); if (partitionName != null) { - PartitionResult pr = new PartitionResult(); + CheckResult.PartitionResult pr = new CheckResult.PartitionResult(); pr.setPartitionName(partitionName); pr.setTableName(table.getTableName()); @@ -360,48 +407,6 @@ void findUnknownPartitions(Table table, Set partPaths, LOG.debug("Number of partitions not in metastore : " + result.getPartitionsNotInMs().size()); } - /** - * Get the partition name from the path. - * - * @param tablePath - * Path of the table. - * @param partitionPath - * Path of the partition. - * @param partCols - * Set of partition columns from table definition - * @return Partition name, for example partitiondate=2008-01-01 - */ - static String getPartitionName(Path tablePath, Path partitionPath, - Set partCols) { - String result = null; - Path currPath = partitionPath; - LOG.debug("tablePath:" + tablePath + ", partCols: " + partCols); - - while (currPath != null && !tablePath.equals(currPath)) { - // format: partition=p_val - // Add only when table partition colName matches - String[] parts = currPath.getName().split("="); - if (parts != null && parts.length > 0) { - if (parts.length != 2) { - LOG.warn(currPath.getName() + " is not a valid partition name"); - return result; - } - - String partitionName = parts[0]; - if (partCols.contains(partitionName)) { - if (result == null) { - result = currPath.getName(); - } else { - result = currPath.getName() + Path.SEPARATOR + result; - } - } - } - currPath = currPath.getParent(); - LOG.debug("currPath=" + currPath); - } - return result; - } - /** * Assume that depth is 2, i.e., partition columns are a and b * tblPath/a=1 => throw exception @@ -414,20 +419,20 @@ static String getPartitionName(Path tablePath, Path partitionPath, * Start directory * @param allDirs * This set will contain the leaf paths at the end. - * @param list - * Specify how deep the search goes. + * @param partColNames + * Partition column names * @throws IOException * Thrown if we can't get lists from the fs. - * @throws HiveException + * @throws MetastoreException */ - private void checkPartitionDirs(Path basePath, Set allDirs, final List partColNames) throws IOException, HiveException { + private void checkPartitionDirs(Path basePath, Set allDirs, final List partColNames) throws IOException, MetastoreException { // Here we just reuse the THREAD_COUNT configuration for // METASTORE_FS_HANDLER_THREADS_COUNT since this results in better performance // The number of missing partitions discovered are later added by metastore using a // threadpool of size METASTORE_FS_HANDLER_THREADS_COUNT. If we have different sized // pool here the smaller sized pool of the two becomes a bottleneck - int poolSize = conf.getInt(ConfVars.METASTORE_FS_HANDLER_THREADS_COUNT.varname, 15); + int poolSize = MetastoreConf.getIntVar(conf, MetastoreConf.ConfVars.FS_HANDLER_THREADS_COUNT); ExecutorService executor; if (poolSize <= 1) { @@ -437,7 +442,7 @@ private void checkPartitionDirs(Path basePath, Set allDirs, final List partColNames, FileS this.pd = pd; this.fs = fs; this.pendingPaths = basePaths; - this.throwException = "throw" - .equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION)); + this.throwException = "throw".equals(MetastoreConf.getVar(conf, MetastoreConf.ConfVars.MSCK_PATH_VALIDATION)); } @Override @@ -467,7 +471,7 @@ public Path call() throws Exception { } private Path processPathDepthInfo(final PathDepthInfo pd) - throws IOException, HiveException, InterruptedException { + throws IOException, MetastoreException { final Path currentPath = pd.p; final int currentDepth = pd.depth; FileStatus[] fileStatuses = fs.listStatus(currentPath, FileUtils.HIDDEN_FILES_PATH_FILTER); @@ -510,9 +514,9 @@ private Path processPathDepthInfo(final PathDepthInfo pd) return null; } - private void logOrThrowExceptionWithMsg(String msg) throws HiveException { + private void logOrThrowExceptionWithMsg(String msg) throws MetastoreException { if(throwException) { - throw new HiveException(msg); + throw new MetastoreException(msg); } else { LOG.warn(msg); } @@ -530,7 +534,7 @@ private void logOrThrowExceptionWithMsg(String msg) throws HiveException { private void checkPartitionDirs(final ExecutorService executor, final Path basePath, final Set result, - final FileSystem fs, final List partColNames) throws HiveException { + final FileSystem fs, final List partColNames) throws MetastoreException { try { Queue> futures = new LinkedList>(); ConcurrentLinkedQueue nextLevel = new ConcurrentLinkedQueue<>(); @@ -561,7 +565,7 @@ private void checkPartitionDirs(final ExecutorService executor, } catch (InterruptedException | ExecutionException e) { LOG.error(e.getMessage()); executor.shutdownNow(); - throw new HiveException(e.getCause()); + throw new MetastoreException(e.getCause()); } } } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Msck.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Msck.java new file mode 100644 index 0000000000..a61c8a3a99 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Msck.java @@ -0,0 +1,530 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.util.AbstractList; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.api.DataOperationType; +import org.apache.hadoop.hive.metastore.api.LockRequest; +import org.apache.hadoop.hive.metastore.api.LockResponse; +import org.apache.hadoop.hive.metastore.api.LockState; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.utils.FileUtils; +import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; +import org.apache.hadoop.hive.metastore.utils.MetastoreException; +import org.apache.hadoop.hive.metastore.utils.ObjectPair; +import org.apache.hadoop.hive.metastore.utils.RetryUtilities; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.thrift.TException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; + +/** + * Msck repairs table metadata specifically related to partition information to be in-sync with directories in table + * location. + */ +public class Msck { + public static final Logger LOG = LoggerFactory.getLogger(Msck.class); + public static final int separator = 9; // tabCode + private static final int terminator = 10; // newLineCode + private boolean acquireLock; + private boolean deleteData; + + private Configuration conf; + private IMetaStoreClient msc; + + public Msck(boolean acquireLock, boolean deleteData) { + this.acquireLock = acquireLock; + this.deleteData = deleteData; + } + + public Configuration getConf() { + return conf; + } + + public void setConf(final Configuration conf) { + this.conf = conf; + } + + public void init(Configuration conf) throws MetaException { + if (msc == null) { + // the only reason we are using new conf here is to override EXPRESSION_PROXY_CLASS + Configuration metastoreConf = MetastoreConf.newMetastoreConf(); + metastoreConf.addResource(conf); + metastoreConf.set(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(), + MsckPartitionExpressionProxy.class.getCanonicalName()); + setConf(metastoreConf); + this.msc = new HiveMetaStoreClient(metastoreConf); + } + } + + /** + * MetastoreCheck, see if the data in the metastore matches what is on the + * dfs. Current version checks for tables and partitions that are either + * missing on disk on in the metastore. + * + * @param msckInfo Information about the tables and partitions we want to check for. + * @return Returns 0 when execution succeeds and above 0 if it fails. + */ + public int repair(MsckInfo msckInfo) { + CheckResult result = new CheckResult(); + List repairOutput = new ArrayList<>(); + String qualifiedTableName = null; + boolean success = false; + long txnId = -1; + int ret = 0; + try { + Table table = getMsc().getTable(msckInfo.getCatalogName(), msckInfo.getDbName(), msckInfo.getTableName()); + if (getConf().getBoolean(MetastoreConf.ConfVars.MSCK_REPAIR_ENABLE_PARTITION_RETENTION.getHiveName(), false)) { + msckInfo.setPartitionExpirySeconds(PartitionManagementTask.getRetentionPeriodInSeconds(table)); + LOG.info("Retention period ({}s) for partition is enabled for MSCK REPAIR..", msckInfo.getPartitionExpirySeconds()); + } + HiveMetaStoreChecker checker = new HiveMetaStoreChecker(getMsc(), getConf(), msckInfo.getPartitionExpirySeconds()); + // checkMetastore call will fill in result with partitions that are present in filesystem + // and missing in metastore - accessed through getPartitionsNotInMs + // And partitions that are not present in filesystem and metadata exists in metastore - + // accessed through getPartitionNotOnFS + checker.checkMetastore(msckInfo.getCatalogName(), msckInfo.getDbName(), msckInfo.getTableName(), + msckInfo.getPartSpecs(), result); + Set partsNotInMs = result.getPartitionsNotInMs(); + Set partsNotInFs = result.getPartitionsNotOnFs(); + Set expiredPartitions = result.getExpiredPartitions(); + int totalPartsToFix = partsNotInMs.size() + partsNotInFs.size() + expiredPartitions.size(); + // if nothing changed to partitions and if we are not repairing (add or drop) don't acquire for lock unnecessarily + boolean lockRequired = totalPartsToFix > 0 && + msckInfo.isRepairPartitions() && + (msckInfo.isAddPartitions() || msckInfo.isDropPartitions()); + LOG.info("#partsNotInMs: {} #partsNotInFs: {} #expiredPartitions: {} lockRequired: {} (R: {} A: {} D: {})", + partsNotInMs.size(), partsNotInFs.size(), expiredPartitions.size(), lockRequired, + msckInfo.isRepairPartitions(), msckInfo.isAddPartitions(), msckInfo.isDropPartitions()); + + if (msckInfo.isRepairPartitions()) { + // Repair metadata in HMS + qualifiedTableName = Warehouse.getCatalogQualifiedTableName(table); + long lockId; + if (acquireLock && lockRequired && table.getParameters() != null && + MetaStoreUtils.isTransactionalTable(table.getParameters())) { + // Running MSCK from beeline/cli will make DDL task acquire X lock when repair is enabled, since we are directly + // invoking msck.repair() without SQL statement, we need to do the same and acquire X lock (repair is default) + LockRequest lockRequest = createLockRequest(msckInfo.getDbName(), msckInfo.getTableName()); + txnId = lockRequest.getTxnid(); + try { + LockResponse res = getMsc().lock(lockRequest); + if (res.getState() != LockState.ACQUIRED) { + throw new MetastoreException("Unable to acquire lock(X) on " + qualifiedTableName); + } + lockId = res.getLockid(); + } catch (TException e) { + throw new MetastoreException("Unable to acquire lock(X) on " + qualifiedTableName, e); + } + LOG.info("Acquired lock(X) on {}. LockId: {}", qualifiedTableName, lockId); + } + int maxRetries = MetastoreConf.getIntVar(getConf(), MetastoreConf.ConfVars.MSCK_REPAIR_BATCH_MAX_RETRIES); + int decayingFactor = 2; + + if (msckInfo.isAddPartitions() && !partsNotInMs.isEmpty()) { + // MSCK called to add missing paritions into metastore and there are + // missing partitions. + + int batchSize = MetastoreConf.getIntVar(getConf(), MetastoreConf.ConfVars.MSCK_REPAIR_BATCH_SIZE); + if (batchSize == 0) { + //batching is not enabled. Try to add all the partitions in one call + batchSize = partsNotInMs.size(); + } + + AbstractList vals = null; + String settingStr = MetastoreConf.getVar(getConf(), MetastoreConf.ConfVars.MSCK_PATH_VALIDATION); + boolean doValidate = !("ignore".equals(settingStr)); + boolean doSkip = doValidate && "skip".equals(settingStr); + // The default setting is "throw"; assume doValidate && !doSkip means throw. + if (doValidate) { + // Validate that we can add partition without escaping. Escaping was originally intended + // to avoid creating invalid HDFS paths; however, if we escape the HDFS path (that we + // deem invalid but HDFS actually supports - it is possible to create HDFS paths with + // unprintable characters like ASCII 7), metastore will create another directory instead + // of the one we are trying to "repair" here. + Iterator iter = partsNotInMs.iterator(); + while (iter.hasNext()) { + CheckResult.PartitionResult part = iter.next(); + try { + vals = Warehouse.makeValsFromName(part.getPartitionName(), vals); + } catch (MetaException ex) { + throw new MetastoreException(ex); + } + for (String val : vals) { + String escapedPath = FileUtils.escapePathName(val); + assert escapedPath != null; + if (escapedPath.equals(val)) { + continue; + } + String errorMsg = "Repair: Cannot add partition " + msckInfo.getTableName() + ':' + + part.getPartitionName() + " due to invalid characters in the name"; + if (doSkip) { + repairOutput.add(errorMsg); + iter.remove(); + } else { + throw new MetastoreException(errorMsg); + } + } + } + } + try { + createPartitionsInBatches(getMsc(), repairOutput, partsNotInMs, table, batchSize, + decayingFactor, maxRetries); + } catch (Exception e) { + throw new MetastoreException(e); + } + } + + if (msckInfo.isDropPartitions() && (!partsNotInFs.isEmpty() || !expiredPartitions.isEmpty())) { + // MSCK called to drop stale paritions from metastore and there are + // stale partitions. + + int batchSize = MetastoreConf.getIntVar(getConf(), MetastoreConf.ConfVars.MSCK_REPAIR_BATCH_SIZE); + if (batchSize == 0) { + //batching is not enabled. Try to drop all the partitions in one call + batchSize = partsNotInFs.size() + expiredPartitions.size(); + } + + try { + dropPartitionsInBatches(getMsc(), repairOutput, partsNotInFs, expiredPartitions, table, batchSize, + decayingFactor, maxRetries); + } catch (Exception e) { + throw new MetastoreException(e); + } + } + } + success = true; + } catch (Exception e) { + LOG.warn("Failed to run metacheck: ", e); + success = false; + ret = 1; + } finally { + if (msckInfo.getResFile() != null) { + BufferedWriter resultOut = null; + try { + Path resFile = new Path(msckInfo.getResFile()); + FileSystem fs = resFile.getFileSystem(getConf()); + resultOut = new BufferedWriter(new OutputStreamWriter(fs.create(resFile))); + + boolean firstWritten = false; + firstWritten |= writeMsckResult(result.getTablesNotInMs(), + "Tables not in metastore:", resultOut, firstWritten); + firstWritten |= writeMsckResult(result.getTablesNotOnFs(), + "Tables missing on filesystem:", resultOut, firstWritten); + firstWritten |= writeMsckResult(result.getPartitionsNotInMs(), + "Partitions not in metastore:", resultOut, firstWritten); + firstWritten |= writeMsckResult(result.getPartitionsNotOnFs(), + "Partitions missing from filesystem:", resultOut, firstWritten); + firstWritten |= writeMsckResult(result.getExpiredPartitions(), + "Expired partitions (retention period: " + msckInfo.getPartitionExpirySeconds() + "s) :", resultOut, firstWritten); + // sorting to stabilize qfile output (msck_repair_drop.q) + Collections.sort(repairOutput); + for (String rout : repairOutput) { + if (firstWritten) { + resultOut.write(terminator); + } else { + firstWritten = true; + } + resultOut.write(rout); + } + } catch (IOException e) { + LOG.warn("Failed to save metacheck output: ", e); + ret = 1; + } finally { + if (resultOut != null) { + try { + resultOut.close(); + } catch (IOException e) { + LOG.warn("Failed to close output file: ", e); + ret = 1; + } + } + } + } + + LOG.info("Tables not in metastore: {}", result.getTablesNotInMs()); + LOG.info("Tables missing on filesystem: {}", result.getTablesNotOnFs()); + LOG.info("Partitions not in metastore: {}", result.getPartitionsNotInMs()); + LOG.info("Partitions missing from filesystem: {}", result.getPartitionsNotOnFs()); + LOG.info("Expired partitions: {}", result.getExpiredPartitions()); + if (acquireLock && txnId > 0) { + if (success) { + try { + LOG.info("txnId: {} succeeded. Committing..", txnId); + getMsc().commitTxn(txnId); + } catch (Exception e) { + LOG.warn("Error while committing txnId: {} for table: {}", txnId, qualifiedTableName, e); + ret = 1; + } + } else { + try { + LOG.info("txnId: {} failed. Aborting..", txnId); + getMsc().abortTxns(Lists.newArrayList(txnId)); + } catch (Exception e) { + LOG.warn("Error while aborting txnId: {} for table: {}", txnId, qualifiedTableName, e); + ret = 1; + } + } + } + if (getMsc() != null) { + getMsc().close(); + msc = null; + } + } + + return ret; + } + + private LockRequest createLockRequest(final String dbName, final String tableName) throws TException { + UserGroupInformation loggedInUser = null; + String username; + try { + loggedInUser = UserGroupInformation.getLoginUser(); + } catch (IOException e) { + LOG.warn("Unable to get logged in user via UGI. err: {}", e.getMessage()); + } + if (loggedInUser == null) { + username = System.getProperty("user.name"); + } else { + username = loggedInUser.getShortUserName(); + } + long txnId = getMsc().openTxn(username); + String agentInfo = Thread.currentThread().getName(); + LockRequestBuilder requestBuilder = new LockRequestBuilder(agentInfo); + requestBuilder.setUser(username); + requestBuilder.setTransactionId(txnId); + + LockComponentBuilder lockCompBuilder = new LockComponentBuilder() + .setDbName(dbName) + .setTableName(tableName) + .setIsTransactional(true) + .setExclusive() + // WriteType is DDL_EXCLUSIVE for MSCK REPAIR so we need NO_TXN. Refer AcidUtils.makeLockComponents + .setOperationType(DataOperationType.NO_TXN); + requestBuilder.addLockComponent(lockCompBuilder.build()); + + LOG.info("Created lock(X) request with info - user: {} txnId: {} agentInfo: {} dbName: {} tableName: {}", + username, txnId, agentInfo, dbName, tableName); + return requestBuilder.build(); + } + + public IMetaStoreClient getMsc() { + return msc; + } + + @VisibleForTesting + public void createPartitionsInBatches(final IMetaStoreClient metastoreClient, List repairOutput, + Set partsNotInMs, Table table, int batchSize, int decayingFactor, int maxRetries) + throws Exception { + String addMsgFormat = "Repair: Added partition to metastore " + + table.getTableName() + ":%s"; + Set batchWork = new HashSet<>(partsNotInMs); + new RetryUtilities.ExponentiallyDecayingBatchWork(batchSize, decayingFactor, maxRetries) { + @Override + public Void execute(int size) throws MetastoreException { + try { + while (!batchWork.isEmpty()) { + List partsToAdd = new ArrayList<>(); + //get the current batch size + int currentBatchSize = size; + //store the partitions temporarily until processed + List lastBatch = new ArrayList<>(currentBatchSize); + List addMsgs = new ArrayList<>(currentBatchSize); + //add the number of partitions given by the current batchsize + for (CheckResult.PartitionResult part : batchWork) { + if (currentBatchSize == 0) { + break; + } + Path tablePath = MetaStoreUtils.getPath(table); + if (tablePath == null) { + continue; + } + Map partSpec = Warehouse.makeSpecFromName(part.getPartitionName()); + Path location = new Path(tablePath, Warehouse.makePartPath(partSpec)); + Partition partition = MetaStoreUtils.createMetaPartitionObject(table, partSpec, location); + partsToAdd.add(partition); + lastBatch.add(part); + addMsgs.add(String.format(addMsgFormat, part.getPartitionName())); + currentBatchSize--; + } + metastoreClient.add_partitions(partsToAdd, true, false); + // if last batch is successful remove it from partsNotInMs + batchWork.removeAll(lastBatch); + repairOutput.addAll(addMsgs); + } + return null; + } catch (TException e) { + throw new MetastoreException(e); + } + } + }.run(); + } + + private static String makePartExpr(Map spec) + throws MetaException { + StringBuilder suffixBuf = new StringBuilder(); + int i = 0; + for (Map.Entry e : spec.entrySet()) { + if (e.getValue() == null || e.getValue().length() == 0) { + throw new MetaException("Partition spec is incorrect. " + spec); + } + if (i > 0) { + suffixBuf.append(" AND "); + } + suffixBuf.append(Warehouse.escapePathName(e.getKey())); + suffixBuf.append('='); + suffixBuf.append("'").append(Warehouse.escapePathName(e.getValue())).append("'"); + i++; + } + return suffixBuf.toString(); + } + + // Drops partitions in batches. partNotInFs is split into batches based on batchSize + // and dropped. The dropping will be through RetryUtilities which will retry when there is a + // failure after reducing the batchSize by decayingFactor. Retrying will cease when maxRetries + // limit is reached or batchSize reduces to 0, whichever comes earlier. + @VisibleForTesting + public void dropPartitionsInBatches(final IMetaStoreClient metastoreClient, List repairOutput, + Set partsNotInFs, Set expiredPartitions, + Table table, int batchSize, int decayingFactor, int maxRetries) throws Exception { + String dropMsgFormat = + "Repair: Dropped partition from metastore " + Warehouse.getCatalogQualifiedTableName(table) + ":%s"; + // Copy of partitions that will be split into batches + Set batchWork = new HashSet<>(partsNotInFs); + if (expiredPartitions != null && !expiredPartitions.isEmpty()) { + batchWork.addAll(expiredPartitions); + } + PartitionDropOptions dropOptions = new PartitionDropOptions().deleteData(deleteData).ifExists(true); + new RetryUtilities.ExponentiallyDecayingBatchWork(batchSize, decayingFactor, maxRetries) { + @Override + public Void execute(int size) throws MetastoreException { + try { + while (!batchWork.isEmpty()) { + int currentBatchSize = size; + + // to store the partitions that are currently being processed + List lastBatch = new ArrayList<>(currentBatchSize); + + // drop messages for the dropped partitions + List dropMsgs = new ArrayList<>(currentBatchSize); + + // Partitions to be dropped + List dropParts = new ArrayList<>(currentBatchSize); + + for (CheckResult.PartitionResult part : batchWork) { + // This batch is full: break out of for loop to execute + if (currentBatchSize == 0) { + break; + } + + dropParts.add(part.getPartitionName()); + + // Add the part to lastBatch to track the parition being dropped + lastBatch.add(part); + + // Update messages + dropMsgs.add(String.format(dropMsgFormat, part.getPartitionName())); + + // Decrement batch size. When this gets to 0, the batch will be executed + currentBatchSize--; + } + + // this call is deleting partitions that are already missing from filesystem + // so 3rd parameter (deleteData) is set to false + // msck is doing a clean up of hms. if for some reason the partition is already + // deleted, then it is good. So, the last parameter ifexists is set to true + List> partExprs = getPartitionExpr(dropParts); + metastoreClient.dropPartitions(table.getCatName(), table.getDbName(), table.getTableName(), partExprs, dropOptions); + + // if last batch is successful remove it from partsNotInFs + batchWork.removeAll(lastBatch); + repairOutput.addAll(dropMsgs); + } + return null; + } catch (TException e) { + throw new MetastoreException(e); + } + } + + private List> getPartitionExpr(final List parts) throws MetaException { + List> expr = new ArrayList<>(parts.size()); + for (int i = 0; i < parts.size(); i++) { + String partName = parts.get(i); + Map partSpec = Warehouse.makeSpecFromName(partName); + String partExpr = makePartExpr(partSpec); + if (LOG.isDebugEnabled()) { + LOG.debug("Generated partExpr: {} for partName: {}", partExpr, partName); + } + expr.add(new ObjectPair<>(i, partExpr.getBytes(StandardCharsets.UTF_8))); + } + return expr; + } + }.run(); + } + + /** + * Write the result of msck to a writer. + * + * @param result The result we're going to write + * @param msg Message to write. + * @param out Writer to write to + * @param wrote if any previous call wrote data + * @return true if something was written + * @throws IOException In case the writing fails + */ + private boolean writeMsckResult(Set result, String msg, + Writer out, boolean wrote) throws IOException { + + if (!result.isEmpty()) { + if (wrote) { + out.write(terminator); + } + + out.write(msg); + for (Object entry : result) { + out.write(separator); + out.write(entry.toString()); + } + return true; + } + + return false; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MsckInfo.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MsckInfo.java new file mode 100644 index 0000000000..81bcb56b1d --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MsckInfo.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.LinkedHashMap; + +/** + * Metadata related to Msck. + */ +public class MsckInfo { + + private String catalogName; + private String dbName; + private String tableName; + private ArrayList> partSpecs; + private String resFile; + private boolean repairPartitions; + private boolean addPartitions; + private boolean dropPartitions; + private long partitionExpirySeconds; + + public MsckInfo(final String catalogName, final String dbName, final String tableName, + final ArrayList> partSpecs, final String resFile, final boolean repairPartitions, + final boolean addPartitions, + final boolean dropPartitions, + final long partitionExpirySeconds) { + this.catalogName = catalogName; + this.dbName = dbName; + this.tableName = tableName; + this.partSpecs = partSpecs; + this.resFile = resFile; + this.repairPartitions = repairPartitions; + this.addPartitions = addPartitions; + this.dropPartitions = dropPartitions; + this.partitionExpirySeconds = partitionExpirySeconds; + } + + public String getCatalogName() { + return catalogName; + } + + public void setCatalogName(final String catalogName) { + this.catalogName = catalogName; + } + + public String getDbName() { + return dbName; + } + + public void setDbName(final String dbName) { + this.dbName = dbName; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(final String tableName) { + this.tableName = tableName; + } + + public ArrayList> getPartSpecs() { + return partSpecs; + } + + public void setPartSpecs(final ArrayList> partSpecs) { + this.partSpecs = partSpecs; + } + + public String getResFile() { + return resFile; + } + + public void setResFile(final String resFile) { + this.resFile = resFile; + } + + public boolean isRepairPartitions() { + return repairPartitions; + } + + public void setRepairPartitions(final boolean repairPartitions) { + this.repairPartitions = repairPartitions; + } + + public boolean isAddPartitions() { + return addPartitions; + } + + public void setAddPartitions(final boolean addPartitions) { + this.addPartitions = addPartitions; + } + + public boolean isDropPartitions() { + return dropPartitions; + } + + public void setDropPartitions(final boolean dropPartitions) { + this.dropPartitions = dropPartitions; + } + + public long getPartitionExpirySeconds() { + return partitionExpirySeconds; + } + + public void setPartitionExpirySeconds(final long partitionExpirySeconds) { + this.partitionExpirySeconds = partitionExpirySeconds; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MsckPartitionExpressionProxy.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MsckPartitionExpressionProxy.java new file mode 100644 index 0000000000..87ab3da07b --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/MsckPartitionExpressionProxy.java @@ -0,0 +1,65 @@ +package org.apache.hadoop.hive.metastore; +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.nio.charset.StandardCharsets; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.FileMetadataExprType; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; + +// This is added as part of moving MSCK code from ql to standalone-metastore. There is a metastore API to drop +// partitions by name but we cannot use it because msck typically will contain partition value (year=2014). We almost +// never drop partition by name (year). So we need to construct expression filters, the current +// PartitionExpressionProxy implementations (PartitionExpressionForMetastore and HCatClientHMSImpl.ExpressionBuilder) +// all depend on ql code to build ExprNodeDesc for the partition expressions. It also depends on kryo for serializing +// the expression objects to byte[]. For MSCK drop partition, we don't need complex expression generator. For now, +// all we do is split the partition spec (year=2014/month=24) into filter expression year='2014' and month='24' and +// rely on metastore database to deal with type conversions. Ideally, PartitionExpressionProxy default implementation +// should use SearchArgument (storage-api) to construct the filter expression and not depend on ql, but the usecase +// for msck is pretty simple and this specific implementation should suffice. +public class MsckPartitionExpressionProxy implements PartitionExpressionProxy { + + @Override + public String convertExprToFilter(final byte[] expr) throws MetaException { + return new String(expr, StandardCharsets.UTF_8); + } + + @Override + public boolean filterPartitionsByExpr(List partColumns, byte[] expr, String + defaultPartitionName, List partitionNames) throws MetaException { + return false; + } + + @Override + public FileMetadataExprType getMetadataType(String inputFormat) { + throw new UnsupportedOperationException(); + } + + @Override + public FileFormatProxy getFileFormatProxy(FileMetadataExprType type) { + throw new UnsupportedOperationException(); + } + + @Override + public SearchArgument createSarg(byte[] expr) { + throw new UnsupportedOperationException(); + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index b4a4616cc8..19b2738570 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -790,10 +790,9 @@ public void rollbackTransaction() { debugLog("rolling back transaction: no open transactions: " + openTrasactionCalls); return; } - debugLog("Rollback transaction, isActive: " + currentTransaction.isActive()); + debugLog("Rollback transaction, isActive: " + isActiveTransaction()); try { - if (currentTransaction.isActive() - && transactionStatus != TXN_STATUS.ROLLBACK) { + if (isActiveTransaction() && transactionStatus != TXN_STATUS.ROLLBACK) { currentTransaction.rollback(); } } finally { @@ -1775,6 +1774,7 @@ private int getObjectCount(String fieldName, String objName) { for (MTable table : tables) { TableMeta metaData = new TableMeta( table.getDatabase().getName(), table.getTableName(), table.getTableType()); + metaData.setCatName(catName); metaData.setComments(table.getParameters().get("comment")); metas.add(metaData); } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java new file mode 100644 index 0000000000..e2e614b539 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/PartitionIterable.java @@ -0,0 +1,163 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; + +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.utils.MetastoreException; + + +/** + * PartitionIterable - effectively a lazy Iterable + * Sometimes, we have a need for iterating through a list of partitions, + * but the list of partitions can be too big to fetch as a single object. + * Thus, the goal of PartitionIterable is to act as an Iterable + * while lazily fetching each relevant partition, one after the other as + * independent metadata calls. + * It is very likely that any calls to PartitionIterable are going to result + * in a large number of calls, so use sparingly only when the memory cost + * of fetching all the partitions in one shot is too prohibitive. + * This is still pretty costly in that it would retain a list of partition + * names, but that should be far less expensive than the entire partition + * objects. + * Note that remove() is an illegal call on this, and will result in an + * IllegalStateException. + */ +public class PartitionIterable implements Iterable { + + @Override + public Iterator iterator() { + return new Iterator() { + + private boolean initialized = false; + private Iterator ptnsIterator = null; + + private Iterator partitionNamesIter = null; + private Iterator batchIter = null; + + private void initialize() { + if (!initialized) { + if (currType == Type.LIST_PROVIDED) { + ptnsIterator = ptnsProvided.iterator(); + } else { + partitionNamesIter = partitionNames.iterator(); + } + initialized = true; + } + } + + @Override + public boolean hasNext() { + initialize(); + if (currType == Type.LIST_PROVIDED) { + return ptnsIterator.hasNext(); + } else { + return ((batchIter != null) && batchIter.hasNext()) || partitionNamesIter.hasNext(); + } + } + + @Override + public Partition next() { + initialize(); + if (currType == Type.LIST_PROVIDED) { + return ptnsIterator.next(); + } + + if ((batchIter == null) || !batchIter.hasNext()) { + getNextBatch(); + } + + return batchIter.next(); + } + + private void getNextBatch() { + int batch_counter = 0; + List nameBatch = new ArrayList(); + while (batch_counter < batch_size && partitionNamesIter.hasNext()) { + nameBatch.add(partitionNamesIter.next()); + batch_counter++; + } + try { + batchIter = + msc.getPartitionsByNames(table.getCatName(), table.getDbName(), table.getTableName(), nameBatch).iterator(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + @Override + public void remove() { + throw new IllegalStateException( + "PartitionIterable is a read-only iterable and remove() is unsupported"); + } + }; + } + + enum Type { + LIST_PROVIDED, // Where a List ptnsProvided = null; + + // used for LAZY_FETCH_PARTITIONS cases + private IMetaStoreClient msc = null; // Assumes one instance of this + single-threaded compilation for each query. + private Table table = null; + private List partitionNames = null; + private int batch_size; + + /** + * Dummy constructor, which simply acts as an iterator on an already-present + * list of partitions, allows for easy drop-in replacement for other methods + * that already have a List + */ + public PartitionIterable(Collection ptnsProvided) { + this.currType = Type.LIST_PROVIDED; + this.ptnsProvided = ptnsProvided; + } + + /** + * Primary constructor that fetches all partitions in a given table, given + * a Hive object and a table object, and a partial partition spec. + */ + public PartitionIterable(IMetaStoreClient msc, Table table, int batch_size) throws MetastoreException { + this.currType = Type.LAZY_FETCH_PARTITIONS; + this.msc = msc; + this.table = table; + this.batch_size = batch_size; + partitionNames = getPartitionNames(msc, table.getCatName(), table.getDbName(), table.getTableName(), (short) -1); + } + + public List getPartitionNames(IMetaStoreClient msc, String catName, String dbName, String tblName, short max) + throws MetastoreException { + try { + return msc.listPartitionNames(catName, dbName, tblName, max); + } catch (Exception e) { + throw new MetastoreException(e); + } + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java new file mode 100644 index 0000000000..901bf80a64 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/PartitionManagementTask.java @@ -0,0 +1,235 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.metastore; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.TableMeta; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.conf.TimeValidator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; +import com.google.common.util.concurrent.ThreadFactoryBuilder; + +/** + * Partition management task is primarily responsible for partition retention and discovery based on table properties. + * + * Partition Retention - If "partition.retention.period" table property is set with retention interval, when this + * metastore task runs periodically, it will drop partitions with age (creation time) greater than retention period. + * Dropping partitions after retention period will also delete the data in that partition. + * + * Partition Discovery - If "discover.partitions" table property is set, this metastore task monitors table location + * for newly added partition directories and create partition objects if it does not exist. Also, if partition object + * exist and if corresponding directory does not exists under table location then the partition object will be dropped. + * + */ +public class PartitionManagementTask implements MetastoreTaskThread { + private static final Logger LOG = LoggerFactory.getLogger(PartitionManagementTask.class); + public static final String DISCOVER_PARTITIONS_TBLPROPERTY = "discover.partitions"; + public static final String PARTITION_RETENTION_PERIOD_TBLPROPERTY = "partition.retention.period"; + private static final Lock lock = new ReentrantLock(); + // these are just for testing + private static int completedAttempts; + private static int skippedAttempts; + + private Configuration conf; + + @Override + public long runFrequency(TimeUnit unit) { + return MetastoreConf.getTimeVar(conf, MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TASK_FREQUENCY, unit); + } + + @Override + public void setConf(Configuration configuration) { + // we modify conf in setupConf(), so we make a copy + conf = new Configuration(configuration); + } + + @Override + public Configuration getConf() { + return conf; + } + + @Override + public void run() { + if (lock.tryLock()) { + skippedAttempts = 0; + String qualifiedTableName = null; + IMetaStoreClient msc = null; + try { + msc = new HiveMetaStoreClient(conf); + List candidateTables = new ArrayList<>(); + String catalogName = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.PARTITION_MANAGEMENT_CATALOG_NAME); + String dbPattern = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.PARTITION_MANAGEMENT_DATABASE_PATTERN); + String tablePattern = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_PATTERN); + String tableTypes = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_TYPES); + Set tableTypesSet = new HashSet<>(); + List tableTypesList; + // if tableTypes is empty, then a list with single empty string has to specified to scan no tables. + // specifying empty here is equivalent to disabling the partition discovery altogether as it scans no tables. + if (tableTypes.isEmpty()) { + tableTypesList = Lists.newArrayList(""); + } else { + for (String type : tableTypes.split(",")) { + try { + tableTypesSet.add(TableType.valueOf(type.trim().toUpperCase()).name()); + } catch (IllegalArgumentException e) { + // ignore + LOG.warn("Unknown table type: {}", type); + } + } + tableTypesList = Lists.newArrayList(tableTypesSet); + } + List foundTableMetas = msc.getTableMeta(catalogName, dbPattern, tablePattern, tableTypesList); + LOG.info("Looking for tables using catalog: {} dbPattern: {} tablePattern: {} found: {}", catalogName, + dbPattern, tablePattern, foundTableMetas.size()); + + for (TableMeta tableMeta : foundTableMetas) { + Table table = msc.getTable(tableMeta.getCatName(), tableMeta.getDbName(), tableMeta.getTableName()); + if (table.getParameters() != null && table.getParameters().containsKey(DISCOVER_PARTITIONS_TBLPROPERTY) && + table.getParameters().get(DISCOVER_PARTITIONS_TBLPROPERTY).equalsIgnoreCase("true")) { + candidateTables.add(table); + } + } + if (candidateTables.isEmpty()) { + return; + } + + // TODO: Msck creates MetastoreClient (MSC) on its own. MSC creation is expensive. Sharing MSC also + // will not be safe unless synchronized MSC is used. Using synchronized MSC in multi-threaded context also + // defeats the purpose of thread pooled msck repair. + int threadPoolSize = MetastoreConf.getIntVar(conf, + MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TASK_THREAD_POOL_SIZE); + final ExecutorService executorService = Executors + .newFixedThreadPool(Math.min(candidateTables.size(), threadPoolSize), + new ThreadFactoryBuilder().setDaemon(true).setNameFormat("PartitionDiscoveryTask-%d").build()); + CountDownLatch countDownLatch = new CountDownLatch(candidateTables.size()); + LOG.info("Found {} candidate tables for partition discovery", candidateTables.size()); + setupMsckConf(); + for (Table table : candidateTables) { + qualifiedTableName = Warehouse.getCatalogQualifiedTableName(table); + long retentionSeconds = getRetentionPeriodInSeconds(table); + LOG.info("Running partition discovery for table {} retentionPeriod: {}s", qualifiedTableName, + retentionSeconds); + // this always runs in 'sync' mode where partitions can be added and dropped + MsckInfo msckInfo = new MsckInfo(table.getCatName(), table.getDbName(), table.getTableName(), + null, null, true, true, true, retentionSeconds); + executorService.submit(new MsckThread(msckInfo, conf, qualifiedTableName, countDownLatch)); + } + countDownLatch.await(); + executorService.shutdownNow(); + } catch (Exception e) { + LOG.error("Exception while running partition discovery task for table: " + qualifiedTableName, e); + } finally { + if (msc != null) { + msc.close(); + } + lock.unlock(); + } + completedAttempts++; + } else { + skippedAttempts++; + LOG.info("Lock is held by some other partition discovery task. Skipping this attempt..#{}", skippedAttempts); + } + } + + static long getRetentionPeriodInSeconds(final Table table) { + String retentionPeriod; + long retentionSeconds = -1; + if (table.getParameters() != null && table.getParameters().containsKey(PARTITION_RETENTION_PERIOD_TBLPROPERTY)) { + retentionPeriod = table.getParameters().get(PARTITION_RETENTION_PERIOD_TBLPROPERTY); + if (retentionPeriod.isEmpty()) { + LOG.warn("'{}' table property is defined but empty. Skipping retention period..", + PARTITION_RETENTION_PERIOD_TBLPROPERTY); + } else { + try { + TimeValidator timeValidator = new TimeValidator(TimeUnit.SECONDS); + timeValidator.validate(retentionPeriod); + retentionSeconds = MetastoreConf.convertTimeStr(retentionPeriod, TimeUnit.SECONDS, TimeUnit.SECONDS); + } catch (IllegalArgumentException e) { + LOG.warn("'{}' retentionPeriod value is invalid. Skipping retention period..", retentionPeriod); + // will return -1 + } + } + } + return retentionSeconds; + } + + private void setupMsckConf() { + // if invalid partition directory appears, we just skip and move on. We don't want partition management to throw + // when invalid path is encountered as these are background threads. We just want to skip and move on. Users will + // have to fix the invalid paths via external means. + conf.set(MetastoreConf.ConfVars.MSCK_PATH_VALIDATION.getVarname(), "skip"); + // since msck runs in thread pool and each of them create their own metastore client, we don't want explosion of + // connections to metastore for embedded mode. Also we don't need too many db connections anyway. + conf.setInt(MetastoreConf.ConfVars.CONNECTION_POOLING_MAX_CONNECTIONS.getVarname(), 2); + } + + private static class MsckThread implements Runnable { + private MsckInfo msckInfo; + private Configuration conf; + private String qualifiedTableName; + private CountDownLatch countDownLatch; + + MsckThread(MsckInfo msckInfo, Configuration conf, String qualifiedTableName, CountDownLatch countDownLatch) { + this.msckInfo = msckInfo; + this.conf = conf; + this.qualifiedTableName = qualifiedTableName; + this.countDownLatch = countDownLatch; + } + + @Override + public void run() { + try { + Msck msck = new Msck( true, true); + msck.init(conf); + msck.repair(msckInfo); + } catch (Exception e) { + LOG.error("Exception while running partition discovery task for table: " + qualifiedTableName, e); + } finally { + // there is no recovery from exception, so we always count down and retry in next attempt + countDownLatch.countDown(); + } + } + } + + @VisibleForTesting + public static int getSkippedAttempts() { + return skippedAttempts; + } + + @VisibleForTesting + public static int getCompletedAttempts() { + return completedAttempts; + } +} diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java index da5a71cc64..e466d94c37 100755 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java @@ -406,7 +406,7 @@ public boolean isWritable(Path path) throws IOException { } } - private static String escapePathName(String path) { + public static String escapePathName(String path) { return FileUtils.escapePathName(path); } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java index 46a6d532b6..ee706a2328 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.HiveAlterHandler; import org.apache.hadoop.hive.metastore.MaterializationsRebuildLockCleanerTask; import org.apache.hadoop.hive.metastore.MetastoreTaskThread; +import org.apache.hadoop.hive.metastore.PartitionManagementTask; import org.apache.hadoop.hive.metastore.RuntimeStatsCleanerTask; import org.apache.hadoop.hive.metastore.events.EventCleanerTask; import org.apache.hadoop.hive.metastore.security.MetastoreDelegationTokenManager; @@ -611,6 +612,58 @@ public static ConfVars getMetaConf(String name) { METRICS_REPORTERS("metastore.metrics.reporters", "metastore.metrics.reporters", "json,jmx", new StringSetValidator("json", "jmx", "console", "hadoop"), "A comma separated list of metrics reporters to start"), + MSCK_PATH_VALIDATION("msck.path.validation", "hive.msck.path.validation", "throw", + new StringSetValidator("throw", "skip", "ignore"), "The approach msck should take with HDFS " + + "directories that are partition-like but contain unsupported characters. 'throw' (an " + + "exception) is the default; 'skip' will skip the invalid directories and still repair the" + + " others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"), + MSCK_REPAIR_BATCH_SIZE("msck.repair.batch.size", + "hive.msck.repair.batch.size", 3000, + "Batch size for the msck repair command. If the value is greater than zero,\n " + + "it will execute batch wise with the configured batch size. In case of errors while\n" + + "adding unknown partitions the batch size is automatically reduced by half in the subsequent\n" + + "retry attempt. The default value is 3000 which means it will execute in the batches of 3000."), + MSCK_REPAIR_BATCH_MAX_RETRIES("msck.repair.batch.max.retries", "hive.msck.repair.batch.max.retries", 4, + "Maximum number of retries for the msck repair command when adding unknown partitions.\n " + + "If the value is greater than zero it will retry adding unknown partitions until the maximum\n" + + "number of attempts is reached or batch size is reduced to 0, whichever is earlier.\n" + + "In each retry attempt it will reduce the batch size by a factor of 2 until it reaches zero.\n" + + "If the value is set to zero it will retry until the batch size becomes zero as described above."), + MSCK_REPAIR_ENABLE_PARTITION_RETENTION("msck.repair.enable.partition.retention", + "msck.repair.enable.partition.retention", false, + "If 'partition.retention.period' table property is set, this flag determines whether MSCK REPAIR\n" + + "command should handle partition retention. If enabled, and if a specific partition's age exceeded\n" + + "retention period the partition will be dropped along with data"), + + + // Partition management task params + PARTITION_MANAGEMENT_TASK_FREQUENCY("metastore.partition.management.task.frequency", + "metastore.partition.management.task.frequency", + 300, TimeUnit.SECONDS, "Frequency at which timer task runs to do automatic partition management for tables\n" + + "with table property 'discover.partitions'='true'. Partition management include 2 pieces. One is partition\n" + + "discovery and other is partition retention period. When 'discover.partitions'='true' is set, partition\n" + + "management will look for partitions in table location and add partitions objects for it in metastore.\n" + + "Similarly if partition object exists in metastore and partition location does not exist, partition object\n" + + "will be dropped. The second piece in partition management is retention period. When 'discover.partition'\n" + + "is set to true and if 'partition.retention.period' table property is defined, partitions that are older\n" + + "than the specified retention period will be automatically dropped from metastore along with the data."), + PARTITION_MANAGEMENT_TABLE_TYPES("metastore.partition.management.table.types", + "metastore.partition.management.table.types", "MANAGED_TABLE,EXTERNAL_TABLE", + "Comma separated list of table types to use for partition management"), + PARTITION_MANAGEMENT_TASK_THREAD_POOL_SIZE("metastore.partition.management.task.thread.pool.size", + "metastore.partition.management.task.thread.pool.size", 5, + "Partition management uses thread pool on to which tasks are submitted for discovering and retaining the\n" + + "partitions. This determines the size of the thread pool."), + PARTITION_MANAGEMENT_CATALOG_NAME("metastore.partition.management.catalog.name", + "metastore.partition.management.catalog.name", "hive", + "Automatic partition management will look for tables under the specified catalog name"), + PARTITION_MANAGEMENT_DATABASE_PATTERN("metastore.partition.management.database.pattern", + "metastore.partition.management.database.pattern", "*", + "Automatic partition management will look for tables using the specified database pattern"), + PARTITION_MANAGEMENT_TABLE_PATTERN("metastore.partition.management.table.pattern", + "metastore.partition.management.table.pattern", "*", + "Automatic partition management will look for tables using the specified table pattern"), + MULTITHREADED("javax.jdo.option.Multithreaded", "javax.jdo.option.Multithreaded", true, "Set this to true if multiple threads access metastore through JDO concurrently."), MAX_OPEN_TXNS("metastore.max.open.txns", "hive.max.open.txns", 100000, @@ -768,7 +821,8 @@ public static ConfVars getMetaConf(String name) { AcidOpenTxnsCounterService.class.getName() + "," + AcidCompactionHistoryService.class.getName() + "," + AcidWriteSetService.class.getName() + "," + - MaterializationsRebuildLockCleanerTask.class.getName(), + MaterializationsRebuildLockCleanerTask.class.getName() + "," + + PartitionManagementTask.class.getName(), "Command separated list of tasks that will be started in separate threads. These will be" + " started only when the metastore is running as a separate service. They must " + "implement " + MetastoreTaskThread.class.getName()), diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java index 41f75da338..f6e1a6c391 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreUtils.java @@ -17,25 +17,59 @@ */ package org.apache.hadoop.hive.metastore.utils; -import org.apache.hadoop.hive.common.TableName; -import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy; +import java.io.File; +import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.net.InetSocketAddress; +import java.net.ServerSocket; +import java.net.Socket; +import java.net.URL; +import java.net.URLClassLoader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Properties; +import java.util.Set; +import java.util.SortedMap; +import java.util.SortedSet; +import java.util.TimeZone; +import java.util.TreeMap; +import java.util.TreeSet; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.regex.Matcher; +import java.util.regex.Pattern; -import com.google.common.base.Joiner; -import com.google.common.base.Predicates; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; -import com.google.common.util.concurrent.ThreadFactoryBuilder; +import javax.annotation.Nullable; -import org.apache.commons.collections.ListUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.collections.ListUtils; import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; +import org.apache.hadoop.hive.common.TableName; import org.apache.hadoop.hive.metastore.ColumnType; import org.apache.hadoop.hive.metastore.HiveMetaStore; +import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.Warehouse; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; @@ -46,12 +80,14 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.metastore.api.InvalidObjectException; import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.Order; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.SerDeInfo; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.WMPoolSchedulingPolicy; import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator; import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory; @@ -67,45 +103,12 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.annotation.Nullable; - -import java.io.File; -import java.io.IOException; -import java.lang.reflect.InvocationTargetException; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.net.InetSocketAddress; -import java.net.ServerSocket; -import java.net.Socket; -import java.net.URL; -import java.net.URLClassLoader; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.security.MessageDigest; -import java.text.DateFormat; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Properties; -import java.util.Map.Entry; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.StringJoiner; -import java.util.TimeZone; -import java.util.TreeMap; -import java.util.TreeSet; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Joiner; +import com.google.common.base.Predicates; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.util.concurrent.ThreadFactoryBuilder; public class MetaStoreUtils { /** A fixed date format to be used for hive partition column values. */ @@ -863,7 +866,7 @@ public static boolean columnsIncludedByNameType(List oldCols, /** Duplicates AcidUtils; used in a couple places in metastore. */ public static boolean isTransactionalTable(Map params) { String transactionalProp = params.get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL); - return (transactionalProp != null && "true".equalsIgnoreCase(transactionalProp)); + return "true".equalsIgnoreCase(transactionalProp); } /** Duplicates AcidUtils; used in a couple places in metastore. */ @@ -1839,4 +1842,161 @@ public static String getDefaultCatalog(Configuration conf) { return catName; } + // Some util methods from Hive.java, this is copied so as to avoid circular dependency with hive ql + public static Path getPath(Table table) { + String location = table.getSd().getLocation(); + if (location == null) { + return null; + } + return new Path(location); + } + + public static List getAllPartitionsOf(IMetaStoreClient msc, Table table) throws MetastoreException { + try { + return msc.listPartitions(table.getCatName(), table.getDbName(), table.getTableName(), (short)-1); + } catch (Exception e) { + throw new MetastoreException(e); + } + } + + public static boolean isPartitioned(Table table) { + if (getPartCols(table) == null) { + return false; + } + return (getPartCols(table).size() != 0); + } + + public static List getPartCols(Table table) { + List partKeys = table.getPartitionKeys(); + if (partKeys == null) { + partKeys = new ArrayList<>(); + table.setPartitionKeys(partKeys); + } + return partKeys; + } + + public static List getPartColNames(Table table) { + List partColNames = new ArrayList<>(); + for (FieldSchema key : getPartCols(table)) { + partColNames.add(key.getName()); + } + return partColNames; + } + + public static Path getDataLocation(Table table, Partition partition) { + if (isPartitioned(table)) { + if (partition.getSd() == null) { + return null; + } else { + return new Path(partition.getSd().getLocation()); + } + } else { + if (table.getSd() == null) { + return null; + } + else { + return getPath(table); + } + } + } + + public static String getPartitionName(Table table, Partition partition) { + try { + return Warehouse.makePartName(getPartCols(table), partition.getValues()); + } catch (MetaException e) { + throw new RuntimeException(e); + } + } + + public static Map getPartitionSpec(Table table, Partition partition) { + return Warehouse.makeSpecFromValues(getPartCols(table), partition.getValues()); + } + + public static Partition getPartition(IMetaStoreClient msc, Table tbl, Map partSpec) throws MetastoreException { + List pvals = new ArrayList(); + for (FieldSchema field : getPartCols(tbl)) { + String val = partSpec.get(field.getName()); + pvals.add(val); + } + Partition tpart = null; + try { + tpart = msc.getPartition(tbl.getCatName(), tbl.getDbName(), tbl.getTableName(), pvals); + } catch (NoSuchObjectException nsoe) { + // this means no partition exists for the given partition + // key value pairs - thrift cannot handle null return values, hence + // getPartition() throws NoSuchObjectException to indicate null partition + } catch (Exception e) { + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new MetastoreException(e); + } + + return tpart; + } + + + /** + * Get the partition name from the path. + * + * @param tablePath + * Path of the table. + * @param partitionPath + * Path of the partition. + * @param partCols + * Set of partition columns from table definition + * @return Partition name, for example partitiondate=2008-01-01 + */ + public static String getPartitionName(Path tablePath, Path partitionPath, Set partCols) { + String result = null; + Path currPath = partitionPath; + LOG.debug("tablePath:" + tablePath + ", partCols: " + partCols); + + while (currPath != null && !tablePath.equals(currPath)) { + // format: partition=p_val + // Add only when table partition colName matches + String[] parts = currPath.getName().split("="); + if (parts.length > 0) { + if (parts.length != 2) { + LOG.warn(currPath.getName() + " is not a valid partition name"); + return result; + } + + String partitionName = parts[0]; + if (partCols.contains(partitionName)) { + if (result == null) { + result = currPath.getName(); + } else { + result = currPath.getName() + Path.SEPARATOR + result; + } + } + } + currPath = currPath.getParent(); + LOG.debug("currPath=" + currPath); + } + return result; + } + + public static Partition createMetaPartitionObject(Table tbl, Map partSpec, Path location) + throws MetastoreException { + List pvals = new ArrayList(); + for (FieldSchema field : getPartCols(tbl)) { + String val = partSpec.get(field.getName()); + if (val == null || val.isEmpty()) { + throw new MetastoreException("partition spec is invalid; field " + + field.getName() + " does not exist or is empty"); + } + pvals.add(val); + } + + Partition tpart = new Partition(); + tpart.setCatName(tbl.getCatName()); + tpart.setDbName(tbl.getDbName()); + tpart.setTableName(tbl.getTableName()); + tpart.setValues(pvals); + + if (!MetaStoreUtils.isView(tbl)) { + tpart.setSd(tbl.getSd().deepCopy()); + tpart.getSd().setLocation((location != null) ? location.toString() : null); + } + return tpart; + } } diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetastoreException.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetastoreException.java new file mode 100644 index 0000000000..fb94ed8029 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/MetastoreException.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.utils; + +public class MetastoreException extends Exception { + public MetastoreException() { + super(); + } + + public MetastoreException(String message) { + super(message); + } + + public MetastoreException(Throwable cause) { + super(cause); + } + + public MetastoreException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/RetryUtilities.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/RetryUtilities.java new file mode 100644 index 0000000000..22513b9b91 --- /dev/null +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/utils/RetryUtilities.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.utils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class RetryUtilities { + public static class RetryException extends Exception { + private static final long serialVersionUID = 1L; + + public RetryException(Exception ex) { + super(ex); + } + + public RetryException(String msg) { + super(msg); + } + } + + /** + * Interface used to create a ExponentialBackOffRetry policy + */ + public static interface ExponentialBackOffRetry { + /** + * This method should be called by implementations of this ExponentialBackOffRetry policy + * It represents the actual work which needs to be done based on a given batch size + * @param batchSize The batch size for the work which needs to be executed + * @return + * @throws Exception + */ + public T execute(int batchSize) throws Exception; + } + + /** + * This class is a base implementation of a simple exponential back retry policy. The batch size + * and decaying factor are provided with the constructor. It reduces the batch size by dividing + * it by the decaying factor every time there is an exception in the execute method. + */ + public static abstract class ExponentiallyDecayingBatchWork + implements ExponentialBackOffRetry { + private int batchSize; + private final int decayingFactor; + private int maxRetries; + private static final Logger LOG = LoggerFactory.getLogger(ExponentiallyDecayingBatchWork.class); + + public ExponentiallyDecayingBatchWork(int batchSize, int reducingFactor, int maxRetries) { + if (batchSize <= 0) { + throw new IllegalArgumentException(String.format( + "Invalid batch size %d provided. Batch size must be greater than 0", batchSize)); + } + this.batchSize = batchSize; + if (reducingFactor <= 1) { + throw new IllegalArgumentException(String.format( + "Invalid decaying factor %d provided. Decaying factor must be greater than 1", + batchSize)); + } + if (maxRetries < 0) { + throw new IllegalArgumentException(String.format( + "Invalid number of maximum retries %d provided. It must be a non-negative integer value", + maxRetries)); + } + //if maxRetries is 0 code retries until batch decays to zero + this.maxRetries = maxRetries; + this.decayingFactor = reducingFactor; + } + + public T run() throws Exception { + int attempt = 0; + while (true) { + int size = getNextBatchSize(); + if (size == 0) { + throw new RetryException("Batch size reduced to zero"); + } + try { + return execute(size); + } catch (Exception ex) { + LOG.warn(String.format("Exception thrown while processing using a batch size %d", size), + ex); + } finally { + attempt++; + if (attempt == maxRetries) { + throw new RetryException(String.format("Maximum number of retry attempts %d exhausted", maxRetries)); + } + } + } + } + + private int getNextBatchSize() { + int ret = batchSize; + batchSize /= decayingFactor; + return ret; + } + } +} diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java index f750ca2a9b..377a550188 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/NonCatCallsWithCatalog.java @@ -482,7 +482,9 @@ public void getTableMeta() throws TException { .build(conf); table.unsetCatName(); client.createTable(table); - expected.add(new TableMeta(dbName, tableNames[i], TableType.MANAGED_TABLE.name())); + TableMeta tableMeta = new TableMeta(dbName, tableNames[i], TableType.MANAGED_TABLE.name()); + tableMeta.setCatName(expectedCatalog()); + expected.add(tableMeta); } List types = Collections.singletonList(TableType.MANAGED_TABLE.name()); diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java index fc996c8c71..b3690ecbb7 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestCatalogOldClient.java @@ -17,10 +17,10 @@ */ package org.apache.hadoop.hive.metastore; -import org.apache.hadoop.hive.metastore.api.MetaException; - import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; +import org.apache.hadoop.hive.metastore.api.MetaException; + /** * This tests calls with an older client, to make sure that if the client supplies no catalog * information the server still does the right thing. I assumes the default catalog diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java new file mode 100644 index 0000000000..059c1669a1 --- /dev/null +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/TestPartitionManagement.java @@ -0,0 +1,581 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_CATALOG_NAME; +import static org.apache.hadoop.hive.metastore.Warehouse.DEFAULT_DATABASE_NAME; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.apache.hadoop.hive.metastore.api.Catalog; +import org.apache.hadoop.hive.metastore.api.Database; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.Table; +import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants; +import org.apache.hadoop.hive.metastore.client.builder.CatalogBuilder; +import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder; +import org.apache.hadoop.hive.metastore.client.builder.PartitionBuilder; +import org.apache.hadoop.hive.metastore.client.builder.TableBuilder; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge; +import org.apache.hadoop.hive.metastore.txn.TxnDbUtil; +import org.apache.thrift.TException; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +import com.google.common.collect.Lists; + +@Category(MetastoreUnitTest.class) +public class TestPartitionManagement { + private IMetaStoreClient client; + private Configuration conf; + + @Before + public void setUp() throws Exception { + conf = MetastoreConf.newMetastoreConf(); + conf.setClass(MetastoreConf.ConfVars.EXPRESSION_PROXY_CLASS.getVarname(), + MsckPartitionExpressionProxy.class, PartitionExpressionProxy.class); + MetaStoreTestUtils.setConfForStandloneMode(conf); + conf.setBoolean(MetastoreConf.ConfVars.MULTITHREADED.getVarname(), false); + MetaStoreTestUtils.startMetaStoreWithRetry(HadoopThriftAuthBridge.getBridge(), conf); + TxnDbUtil.setConfValues(conf); + TxnDbUtil.prepDb(conf); + client = new HiveMetaStoreClient(conf); + } + + @After + public void tearDown() throws Exception { + if (client != null) { + // Drop any left over catalogs + List catalogs = client.getCatalogs(); + for (String catName : catalogs) { + if (!catName.equalsIgnoreCase(DEFAULT_CATALOG_NAME)) { + // First drop any databases in catalog + List databases = client.getAllDatabases(catName); + for (String db : databases) { + client.dropDatabase(catName, db, true, false, true); + } + client.dropCatalog(catName); + } else { + List databases = client.getAllDatabases(catName); + for (String db : databases) { + if (!db.equalsIgnoreCase(Warehouse.DEFAULT_DATABASE_NAME)) { + client.dropDatabase(catName, db, true, false, true); + } + } + } + } + } + try { + if (client != null) { + client.close(); + } + } finally { + client = null; + } + } + + private Map buildAllColumns() { + Map colMap = new HashMap<>(6); + Column[] cols = {new Column("b", "binary"), new Column("bo", "boolean"), + new Column("d", "date"), new Column("do", "double"), new Column("l", "bigint"), + new Column("s", "string")}; + for (Column c : cols) { + colMap.put(c.colName, c); + } + return colMap; + } + + private List createMetadata(String catName, String dbName, String tableName, + List partKeys, List partKeyTypes, List> partVals, + Map colMap, boolean isOrc) + throws TException { + if (!DEFAULT_CATALOG_NAME.equals(catName)) { + Catalog cat = new CatalogBuilder() + .setName(catName) + .setLocation(MetaStoreTestUtils.getTestWarehouseDir(catName)) + .build(); + client.createCatalog(cat); + } + + Database db; + if (!DEFAULT_DATABASE_NAME.equals(dbName)) { + DatabaseBuilder dbBuilder = new DatabaseBuilder() + .setName(dbName); + dbBuilder.setCatalogName(catName); + db = dbBuilder.create(client, conf); + } else { + db = client.getDatabase(DEFAULT_CATALOG_NAME, DEFAULT_DATABASE_NAME); + } + + TableBuilder tb = new TableBuilder() + .inDb(db) + .setTableName(tableName); + + if (isOrc) { + tb.setInputFormat("org.apache.hadoop.hive.ql.io.orc.OrcInputFormat") + .setOutputFormat("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); + } + + for (Column col : colMap.values()) { + tb.addCol(col.colName, col.colType); + } + + if (partKeys != null) { + if (partKeyTypes == null) { + throw new IllegalArgumentException("partKeyTypes cannot be null when partKeys is non-null"); + } + if (partKeys.size() != partKeyTypes.size()) { + throw new IllegalArgumentException("partKeys and partKeyTypes size should be same"); + } + if (partVals.isEmpty()) { + throw new IllegalArgumentException("partVals cannot be empty for patitioned table"); + } + for (int i = 0; i < partKeys.size(); i++) { + tb.addPartCol(partKeys.get(i), partKeyTypes.get(i)); + } + } + Table table = tb.create(client, conf); + + if (partKeys != null) { + for (List partVal : partVals) { + new PartitionBuilder() + .inTable(table) + .setValues(partVal) + .addToTable(client, conf); + } + } + + List partNames = new ArrayList<>(); + if (partKeys != null) { + for (int i = 0; i < partKeys.size(); i++) { + String partKey = partKeys.get(i); + for (String partVal : partVals.get(i)) { + String partName = partKey + "=" + partVal; + partNames.add(partName); + } + } + } + client.flushCache(); + return partNames; + } + + @Test + public void testPartitionDiscoveryDisabledByDefault() throws TException, IOException { + String dbName = "db1"; + String tableName = "tbl1"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + fs.mkdirs(new Path(tablePath, "state=WA/dt=2018-12-01")); + fs.mkdirs(new Path(tablePath, "state=UT/dt=2018-12-02")); + assertEquals(5, fs.listStatus(tablePath).length); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // partition discovery is not enabled via table property, so nothing should change on this table + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // table property is set to false, so no change expected + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "false"); + client.alter_table(dbName, tableName, table); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + } + + @Test + public void testPartitionDiscoveryEnabledBothTableTypes() throws TException, IOException { + String dbName = "db2"; + String tableName = "tbl2"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // table property is set to true, we expect 5 partitions + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + client.alter_table(dbName, tableName, table); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + + // change table type to external, delete a partition directory and make sure partition discovery works + table.getParameters().put("EXTERNAL", "true"); + table.setTableType(TableType.EXTERNAL_TABLE.name()); + client.alter_table(dbName, tableName, table); + boolean deleted = fs.delete(newPart1.getParent(), true); + assertTrue(deleted); + assertEquals(4, fs.listStatus(tablePath).length); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(4, partitions.size()); + + // remove external tables from partition discovery and expect no changes even after partition is deleted + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_TYPES.getVarname(), TableType.MANAGED_TABLE.name()); + deleted = fs.delete(newPart2.getParent(), true); + assertTrue(deleted); + assertEquals(3, fs.listStatus(tablePath).length); + // this doesn't remove partition because table is still external and we have remove external table type from + // partition discovery + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(4, partitions.size()); + + // no table types specified, msck will not select any tables + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_TYPES.getVarname(), ""); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(4, partitions.size()); + + // only EXTERNAL table type, msck should drop a partition now + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_TYPES.getVarname(), TableType.EXTERNAL_TABLE.name()); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + } + + @Test + public void testPartitionDiscoveryNonDefaultCatalog() throws TException, IOException { + String catName = "cat3"; + String dbName = "db3"; + String tableName = "tbl3"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(catName, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(catName, dbName, tableName); + List partitions = client.listPartitions(catName, dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + client.alter_table(catName, dbName, tableName, table); + // default catalog in conf is 'hive' but we are using 'cat3' as catName for this test, so msck should not fix + // anything for this one + runPartitionManagementTask(conf); + partitions = client.listPartitions(catName, dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // using the correct catalog name, we expect msck to fix partitions + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_CATALOG_NAME.getVarname(), catName); + runPartitionManagementTask(conf); + partitions = client.listPartitions(catName, dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + } + + @Test + public void testPartitionDiscoveryDBPattern() throws TException, IOException { + String dbName = "db4"; + String tableName = "tbl4"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + client.alter_table(dbName, tableName, table); + // no match for this db pattern, so we will see only 3 partitions + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_DATABASE_PATTERN.getVarname(), "*dbfoo*"); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // matching db pattern, we will see all 5 partitions now + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_DATABASE_PATTERN.getVarname(), "*db4*"); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + } + + @Test + public void testPartitionDiscoveryTablePattern() throws TException, IOException { + String dbName = "db5"; + String tableName = "tbl5"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + client.alter_table(dbName, tableName, table); + // no match for this table pattern, so we will see only 3 partitions + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_PATTERN.getVarname(), "*tblfoo*"); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + + // matching table pattern, we will see all 5 partitions now + conf.set(MetastoreConf.ConfVars.PARTITION_MANAGEMENT_TABLE_PATTERN.getVarname(), "tbl5*"); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + } + + @Test + public void testPartitionDiscoveryTransactionalTable() + throws TException, IOException, InterruptedException, ExecutionException { + String dbName = "db6"; + String tableName = "tbl6"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, true); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + table.getParameters().put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true"); + table.getParameters().put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, + TransactionalValidationListener.INSERTONLY_TRANSACTIONAL_PROPERTY); + client.alter_table(dbName, tableName, table); + + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + + // only one partition discovery task is running, there will be no skipped attempts + assertEquals(0, PartitionManagementTask.getSkippedAttempts()); + + // delete a partition from fs, and submit 3 tasks at the same time each of them trying to acquire X lock on the + // same table, only one of them will run other attempts will be skipped + boolean deleted = fs.delete(newPart1.getParent(), true); + assertTrue(deleted); + assertEquals(4, fs.listStatus(tablePath).length); + + // 3 tasks are submitted at the same time, only one will eventually lock the table and only one get to run at a time + // This is to simulate, skipping partition discovery task attempt when previous attempt is still incomplete + PartitionManagementTask partitionDiscoveryTask1 = new PartitionManagementTask(); + partitionDiscoveryTask1.setConf(conf); + PartitionManagementTask partitionDiscoveryTask2 = new PartitionManagementTask(); + partitionDiscoveryTask2.setConf(conf); + PartitionManagementTask partitionDiscoveryTask3 = new PartitionManagementTask(); + partitionDiscoveryTask3.setConf(conf); + List tasks = Lists + .newArrayList(partitionDiscoveryTask1, partitionDiscoveryTask2, partitionDiscoveryTask3); + ExecutorService executorService = Executors.newFixedThreadPool(3); + int successBefore = PartitionManagementTask.getCompletedAttempts(); + int skippedBefore = PartitionManagementTask.getSkippedAttempts(); + List> futures = new ArrayList<>(); + for (PartitionManagementTask task : tasks) { + futures.add(executorService.submit(task)); + } + for (Future future : futures) { + future.get(); + } + int successAfter = PartitionManagementTask.getCompletedAttempts(); + int skippedAfter = PartitionManagementTask.getSkippedAttempts(); + assertEquals(1, successAfter - successBefore); + assertEquals(2, skippedAfter - skippedBefore); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(4, partitions.size()); + } + + @Test + public void testPartitionRetention() throws TException, IOException, InterruptedException { + String dbName = "db7"; + String tableName = "tbl7"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt=2018-12-02"); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + table.getParameters().put(PartitionManagementTask.PARTITION_RETENTION_PERIOD_TBLPROPERTY, "20000ms"); + client.alter_table(dbName, tableName, table); + + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(5, partitions.size()); + + // after 30s all partitions should have been gone + Thread.sleep(30 * 1000); + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(0, partitions.size()); + } + + @Test + public void testPartitionDiscoverySkipInvalidPath() throws TException, IOException, InterruptedException { + String dbName = "db8"; + String tableName = "tbl8"; + Map colMap = buildAllColumns(); + List partKeys = Lists.newArrayList("state", "dt"); + List partKeyTypes = Lists.newArrayList("string", "date"); + List> partVals = Lists.newArrayList( + Lists.newArrayList("__HIVE_DEFAULT_PARTITION__", "1990-01-01"), + Lists.newArrayList("CA", "1986-04-28"), + Lists.newArrayList("MN", "2018-11-31")); + createMetadata(DEFAULT_CATALOG_NAME, dbName, tableName, partKeys, partKeyTypes, partVals, colMap, false); + Table table = client.getTable(dbName, tableName); + List partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(3, partitions.size()); + String tableLocation = table.getSd().getLocation(); + URI location = URI.create(tableLocation); + Path tablePath = new Path(location); + FileSystem fs = FileSystem.get(location, conf); + Path newPart1 = new Path(tablePath, "state=WA/dt=2018-12-01"); + Path newPart2 = new Path(tablePath, "state=UT/dt="); + fs.mkdirs(newPart1); + fs.mkdirs(newPart2); + assertEquals(5, fs.listStatus(tablePath).length); + table.getParameters().put(PartitionManagementTask.DISCOVER_PARTITIONS_TBLPROPERTY, "true"); + // empty retention period basically means disabled + table.getParameters().put(PartitionManagementTask.PARTITION_RETENTION_PERIOD_TBLPROPERTY, ""); + client.alter_table(dbName, tableName, table); + + // there is one partition with invalid path which will get skipped + runPartitionManagementTask(conf); + partitions = client.listPartitions(dbName, tableName, (short) -1); + assertEquals(4, partitions.size()); + } + + private void runPartitionManagementTask(Configuration conf) { + PartitionManagementTask task = new PartitionManagementTask(); + task.setConf(conf); + task.run(); + } + + private static class Column { + private String colName; + private String colType; + + public Column(final String colName, final String colType) { + this.colName = colName; + this.colType = colType; + } + } +} diff --git a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestGetTableMeta.java b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestGetTableMeta.java index d8448c8783..d9866b3bbc 100644 --- a/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestGetTableMeta.java +++ b/standalone-metastore/src/test/java/org/apache/hadoop/hive/metastore/client/TestGetTableMeta.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.MetaStoreTestUtils; import org.apache.hadoop.hive.metastore.TableType; import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; import org.apache.hadoop.hive.metastore.api.CreationMetadata; import org.apache.hadoop.hive.metastore.api.Catalog; import org.apache.hadoop.hive.metastore.api.Database; @@ -119,6 +120,7 @@ private void createDB(String dbName) throws TException { private Table createTable(String dbName, String tableName, TableType type) throws Exception { TableBuilder builder = new TableBuilder() + .setCatName("hive") .setDbName(dbName) .setTableName(tableName) .addCol("id", "int") @@ -149,6 +151,7 @@ private TableMeta createTestTable(String dbName, String tableName, TableType typ client.createTable(table); TableMeta tableMeta = new TableMeta(dbName, tableName, type.name()); tableMeta.setComments(comment); + tableMeta.setCatName("hive"); return tableMeta; } @@ -156,7 +159,9 @@ private TableMeta createTestTable(String dbName, String tableName, TableType typ throws Exception { Table table = createTable(dbName, tableName, type); client.createTable(table); - return new TableMeta(dbName, tableName, type.name()); + TableMeta tableMeta = new TableMeta(dbName, tableName, type.name()); + tableMeta.setCatName("hive"); + return tableMeta; } private void assertTableMetas(int[] expected, List actualTableMetas) { @@ -297,7 +302,9 @@ public void tablesInDifferentCatalog() throws TException { .addCol("id", "int") .addCol("name", "string") .build(metaStore.getConf())); - expected.add(new TableMeta(dbName, tableNames[i], TableType.MANAGED_TABLE.name())); + TableMeta tableMeta = new TableMeta(dbName, tableNames[i], TableType.MANAGED_TABLE.name()); + tableMeta.setCatName(catName); + expected.add(tableMeta); } List types = Collections.singletonList(TableType.MANAGED_TABLE.name());