diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java index 37228b0..35114c6 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/common/HCatConstants.java @@ -96,6 +96,19 @@ private HCatConstants() { // restrict instantiation public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS = "hcat.desired.partition.num.splits"; + /** + * hcat.append.limit allows a hcat user to specify a custom append limit. + * By default, while appending to an existing directory, hcat will attempt + * to avoid naming clashes and try to append _a_NNN where NNN is a number to + * the desired filename to avoid clashes. However, by default, it only tries + * for NNN from 0 to 999 before giving up. This can cause an issue for some + * tables with an extraordinarily large number of files. Ideally, this should + * be fixed by the user changing their usage pattern and doing some manner of + * compaction, but in the meanwhile, until they can, setting this parameter + * can be used to bump that limit. + */ + public static final String HCAT_APPEND_LIMIT = "hcat.append.limit"; + // IMPORTANT IMPORTANT IMPORTANT!!!!! //The keys used to store info into the job Configuration. //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer diff --git a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java index cc90129..83dbe76 100644 --- a/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java +++ b/hcatalog/core/src/main/java/org/apache/hive/hcatalog/mapreduce/FileOutputCommitterContainer.java @@ -75,6 +75,8 @@ static final String DYNTEMP_DIR_NAME = "_DYN"; static final String SCRATCH_DIR_NAME = "_SCRATCH"; private static final String APPEND_SUFFIX = "_a_"; + private static final int APPEND_COUNTER_WARN_THRESHOLD = 1000; + private final int maxAppendAttempts; private static final Logger LOG = LoggerFactory.getLogger(FileOutputCommitterContainer.class); private final boolean dynamicPartitioningUsed; @@ -112,6 +114,8 @@ public FileOutputCommitterContainer(JobContext context, } else { customDynamicLocationUsed = false; } + + this.maxAppendAttempts = context.getConfiguration().getInt(HCatConstants.HCAT_APPEND_LIMIT, APPEND_COUNTER_WARN_THRESHOLD); } @Override @@ -646,19 +650,23 @@ private Path getFinalPath(FileSystem fs, Path file, Path src, filetype = ""; } - // Attempt to find COUNTER_MAX possible alternatives to a filename by + // Attempt to find maxAppendAttempts possible alternatives to a filename by // appending _a_N and seeing if that destination also clashes. If we're // still clashing after that, give up. - final int COUNTER_MAX = 1000; int counter = 1; - for (; fs.exists(itemDest) && counter < COUNTER_MAX ; counter++) { + for (; fs.exists(itemDest) && counter < maxAppendAttempts; counter++) { itemDest = new Path(dest, name + (APPEND_SUFFIX + counter) + filetype); } - if (counter == COUNTER_MAX){ + if (counter == maxAppendAttempts){ throw new HCatException(ErrorType.ERROR_MOVE_FAILED, "Could not find a unique destination path for move: file = " + file + " , src = " + src + ", dest = " + dest); + } else if (counter > APPEND_COUNTER_WARN_THRESHOLD) { + LOG.warn("Append job used filename clash counter [" + counter + +"] which is greater than warning limit [" + APPEND_COUNTER_WARN_THRESHOLD + +"]. Please compact this table so that performance is not impacted." + + " Please see HIVE-9381 for details."); } }