From 5b388a2361bdde3b32c8a2bd80cf46603e7d3ffa Mon Sep 17 00:00:00 2001 From: Elliott Clark Date: Thu, 6 Jun 2013 13:56:29 -0700 Subject: [PATCH] Backport HBASE-7842 Add compaction policy that explores more storefile groups --- .../apache/hadoop/hbase/regionserver/Store.java | 218 +++++++++++++++----- 1 file changed, 169 insertions(+), 49 deletions(-) diff --git src/main/java/org/apache/hadoop/hbase/regionserver/Store.java src/main/java/org/apache/hadoop/hbase/regionserver/Store.java index a20892e..9971da8 100644 --- src/main/java/org/apache/hadoop/hbase/regionserver/Store.java +++ src/main/java/org/apache/hadoop/hbase/regionserver/Store.java @@ -1553,9 +1553,6 @@ public class Store extends SchemaConfigured implements HeapSize { if (!majorcompaction && !hasReferences(compactSelection.getFilesToCompact())) { - // we're doing a minor compaction, let's see what files are applicable - int start = 0; - double r = compactSelection.getCompactSelectionRatio(); // remove bulk import files that request to be excluded from minors compactSelection.getFilesToCompact().removeAll(Collections2.filter( @@ -1576,25 +1573,48 @@ public class Store extends SchemaConfigured implements HeapSize { compactSelection.emptyFileList(); return compactSelection; } + if (conf.getBoolean("hbase.hstore.useExploringCompation", true)) { + compactSelection = exploringCompactionSelection(compactSelection); + } else { + compactSelection = defaultCompactionSelection(compactSelection); + } + } else { + if(majorcompaction) { + if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { + LOG.debug("Warning, compacting more than " + this.maxFilesToCompact + + " files, probably because of a user-requested major compaction"); + if(priority != PRIORITY_USER) { + LOG.error("Compacting more than max files on a non user-requested compaction"); + } + } + } else if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { + // all files included in this compaction, up to max + int pastMax = compactSelection.getFilesToCompact().size() - this.maxFilesToCompact; + compactSelection.getFilesToCompact().subList(0, pastMax).clear(); + } + } + return compactSelection; + } - /* TODO: add sorting + unit test back in when HBASE-2856 is fixed - // Sort files by size to correct when normal skew is altered by bulk load. - Collections.sort(filesToCompact, StoreFile.Comparators.FILE_SIZE); - */ + private CompactSelection defaultCompactionSelection(CompactSelection compactSelection) { + // we're doing a minor compaction, let's see what files are applicable + int start = 0; - // get store file sizes for incremental compacting selection. - int countOfFiles = compactSelection.getFilesToCompact().size(); - long [] fileSizes = new long[countOfFiles]; - long [] sumSize = new long[countOfFiles]; - for (int i = countOfFiles-1; i >= 0; --i) { - StoreFile file = compactSelection.getFilesToCompact().get(i); - fileSizes[i] = file.getReader().length(); - // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo - int tooFar = i + this.maxFilesToCompact - 1; - sumSize[i] = fileSizes[i] - + ((i+1 < countOfFiles) ? sumSize[i+1] : 0) - - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0); - } + double r = compactSelection.getCompactSelectionRatio(); + + // get store file sizes for incremental compacting selection. + int countOfFiles = compactSelection.getFilesToCompact().size(); + long [] fileSizes = new long[countOfFiles]; + long [] sumSize = new long[countOfFiles]; + for (int i = countOfFiles-1; i >= 0; --i) { + StoreFile file = compactSelection.getFilesToCompact().get(i); + fileSizes[i] = file.getReader().length(); + // calculate the sum of fileSizes[i,i+maxFilesToCompact-1) for algo + int tooFar = i + this.maxFilesToCompact - 1; + sumSize[i] = fileSizes[i] + + ((i+1 < countOfFiles) ? sumSize[i+1] : 0) + - ((tooFar < countOfFiles) ? fileSizes[tooFar] : 0); + } /* Start at the oldest file and stop when you find the first file that * meets compaction criteria: @@ -1609,43 +1629,143 @@ public class Store extends SchemaConfigured implements HeapSize { * situation where we always compact [end-threshold,end). Then, the * last file becomes an aggregate of the previous compactions. */ - while(countOfFiles - start >= this.minFilesToCompact && - fileSizes[start] > - Math.max(minCompactSize, (long)(sumSize[start+1] * r))) { - ++start; - } - int end = Math.min(countOfFiles, start + this.maxFilesToCompact); - long totalSize = fileSizes[start] - + ((start+1 < countOfFiles) ? sumSize[start+1] : 0); - compactSelection = compactSelection.getSubList(start, end); - - // if we don't have enough files to compact, just wait - if (compactSelection.getFilesToCompact().size() < this.minFilesToCompact) { - if (LOG.isDebugEnabled()) { - LOG.debug("Skipped compaction of " + this + while(countOfFiles - start >= this.minFilesToCompact && + fileSizes[start] > + Math.max(minCompactSize, (long)(sumSize[start+1] * r))) { + ++start; + } + int end = Math.min(countOfFiles, start + this.maxFilesToCompact); + long totalSize = fileSizes[start] + + ((start+1 < countOfFiles) ? sumSize[start+1] : 0); + compactSelection = compactSelection.getSubList(start, end); + + // if we don't have enough files to compact, just wait + if (compactSelection.getFilesToCompact().size() < this.minFilesToCompact) { + if (LOG.isDebugEnabled()) { + LOG.debug("Skipped compaction of " + this + ". Only " + (end - start) + " file(s) of size " + StringUtils.humanReadableInt(totalSize) + " have met compaction criteria."); - } - compactSelection.emptyFileList(); - return compactSelection; } - } else { - if(majorcompaction) { - if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { - LOG.debug("Warning, compacting more than " + this.maxFilesToCompact + - " files, probably because of a user-requested major compaction"); - if(priority != PRIORITY_USER) { - LOG.error("Compacting more than max files on a non user-requested compaction"); - } + compactSelection.emptyFileList(); + return compactSelection; + } + return compactSelection; + } + + private CompactSelection exploringCompactionSelection(CompactSelection compactSelection) { + + List candidates = compactSelection.getFilesToCompact(); + int futureFiles = filesCompacting.isEmpty() ? 0 : 1; + boolean mayBeStuck = (candidates.size() - filesCompacting.size() + futureFiles) + >= blockingStoreFileCount; + // Start off choosing nothing. + List bestSelection = new ArrayList(0); + List smallest = new ArrayList(0); + long bestSize = 0; + long smallestSize = Long.MAX_VALUE; + double r = compactSelection.getCompactSelectionRatio(); + + // Consider every starting place. + for (int startIndex = 0; startIndex < candidates.size(); startIndex++) { + // Consider every different sub list permutation in between start and end with min files. + for (int currentEnd = startIndex + minFilesToCompact - 1; + currentEnd < candidates.size(); currentEnd++) { + List potentialMatchFiles = candidates.subList(startIndex, currentEnd + 1); + + // Sanity checks + if (potentialMatchFiles.size() < minFilesToCompact) { + continue; + } + if (potentialMatchFiles.size() > maxFilesToCompact) { + continue; + } + + // Compute the total size of files that will + // have to be read if this set of files is compacted. + long size = getCompactionSize(potentialMatchFiles); + + // Store the smallest set of files. This stored set of files will be used + // if it looks like the algorithm is stuck. + if (size < smallestSize) { + smallest = potentialMatchFiles; + smallestSize = size; + } + + if (size >= minCompactSize + && !filesInRatio(potentialMatchFiles, r)) { + continue; + } + + if (size > maxCompactSize) { + continue; + } + + // Keep if this gets rid of more files. Or the same number of files for less io. + if (potentialMatchFiles.size() > bestSelection.size() + || (potentialMatchFiles.size() == bestSelection.size() && size < bestSize)) { + bestSelection = potentialMatchFiles; + bestSize = size; } - } else if (compactSelection.getFilesToCompact().size() > this.maxFilesToCompact) { - // all files included in this compaction, up to max - int pastMax = compactSelection.getFilesToCompact().size() - this.maxFilesToCompact; - compactSelection.getFilesToCompact().subList(0, pastMax).clear(); } } + + if (bestSelection.size() == 0 && mayBeStuck) { + smallest = new ArrayList(smallest); + compactSelection.getFilesToCompact().clear(); + compactSelection.getFilesToCompact().addAll(smallest); + } else { + bestSelection = new ArrayList(bestSelection); + compactSelection.getFilesToCompact().clear(); + compactSelection.getFilesToCompact().addAll(bestSelection); + } + return compactSelection; + + } + + /** + * Check that all files satisfy the ratio + * + * @param files set of files to examine. + * @param currentRatio The raio + * @return if all files are in ratio. + */ + private boolean filesInRatio(final List files, final double currentRatio) { + if (files.size() < 2) { + return true; + } + long totalFileSize = 0; + for (int i = 0; i < files.size(); i++) { + totalFileSize += files.get(i).getReader().length(); + } + for (int i = 0; i < files.size(); i++) { + long singleFileSize = files.get(i).getReader().length(); + long sumAllOtherFilesize = totalFileSize - singleFileSize; + + if ((singleFileSize > sumAllOtherFilesize * currentRatio) + && (sumAllOtherFilesize >= this.minCompactSize)) { + return false; + } + } + return true; + } + + /** + * Get the number of bytes a proposed compaction would have to read. + * + * @param files Set of files in a proposed compaction. + * @return size in bytes. + */ + private long getCompactionSize(final List files) { + long size = 0; + if (files == null) { + return size; + } + for (StoreFile f : files) { + size += f.getReader().length(); + } + return size; } /** -- 1.7.10.2 (Apple Git-33)