Index: conf/hbase-default.xml =================================================================== --- conf/hbase-default.xml (revision 688978) +++ conf/hbase-default.xml (working copy) @@ -245,6 +245,18 @@ + hbase.hstore.compaction.max + 10 + Max number of HStoreFiles to compact at one time while doing a minor compaction. + + + + hbase.hregion.majorcompaction + 86400000 + The time (in miliseconds) between compactions of all HStoreFiles in to one default 1 day. + + + hbase.regionserver.thread.splitcompactcheckfrequency 20000 How often a region server runs the split/compaction check. Index: src/java/org/apache/hadoop/hbase/regionserver/HStore.java =================================================================== --- src/java/org/apache/hadoop/hbase/regionserver/HStore.java (revision 688978) +++ src/java/org/apache/hadoop/hbase/regionserver/HStore.java (working copy) @@ -86,7 +86,8 @@ final FileSystem fs; private final HBaseConfiguration conf; protected long ttl; - + private long majorCompactionTime; + private int maxFilesToCompact; private final long desiredMaxFileSize; private volatile long storeSize; @@ -187,6 +188,8 @@ } this.desiredMaxFileSize = maxFileSize; + this.majorCompactionTime = conf.getLong("hbase.hregion.majorcompaction", 86400000); + this.maxFilesToCompact = conf.getInt("hbase.hstore.compaction.max", 10); this.storeSize = 0L; if (family.getCompression() == HColumnDescriptor.CompressionType.BLOCK) { @@ -708,7 +711,29 @@ } return false; } - + + /* + * Gets lowest timestamp from files in a dir + * + * @param fs + * @param dir + * @throws IOException + */ + private static long getLowestTimestamp(FileSystem fs, Path dir) throws IOException { + FileStatus[] stats = fs.listStatus(dir); + if (stats == null || stats.length == 0) { + return 0l; + } + long lowTimestamp = Long.MAX_VALUE; + for (int i = 0; i < stats.length; i++) { + long timestamp = stats[i].getModificationTime(); + if (timestamp < lowTimestamp){ + lowTimestamp = timestamp; + } + } + return lowTimestamp; + } + /** * Compact the back-HStores. This method may take some time, so the calling * thread must be able to block for long periods. @@ -730,7 +755,7 @@ * @return mid key if a split is needed, null otherwise * @throws IOException */ - StoreSize compact(final boolean force) throws IOException { + StoreSize compact(boolean force) throws IOException { synchronized (compactLock) { long maxId = -1; int nrows = -1; @@ -740,11 +765,27 @@ return null; } filesToCompact = new ArrayList(this.storefiles.values()); - + // The max-sequenceID in any of the to-be-compacted TreeMaps is the // last key of storefiles. maxId = this.storefiles.lastKey().longValue(); } + // check to see if we need to do a major compaction on this region + // if so then change force to true to skip the incremental compacting below + // only check if force is not true if true major compaction will happen anyways + if (!force){ + Path mapdir = HStoreFile.getMapDir(basedir, info.getEncodedName(), family.getName()); + long lowTimestamp = getLowestTimestamp(fs, mapdir); + if (LOG.isDebugEnabled() && lowTimestamp > 0l) { + LOG.debug("Hours sense last major compaction: " + (System.currentTimeMillis()-lowTimestamp)/3600000); + } + if (lowTimestamp < (System.currentTimeMillis() - majorCompactionTime) && lowTimestamp > 0l){ + if (LOG.isDebugEnabled()) { + LOG.debug("Major compaction triggered on store: " + this.storeNameStr); + } + force = true; + } + } if (!force && !hasReferences(filesToCompact) && filesToCompact.size() < compactionThreshold) { return checkSplit(); @@ -777,7 +818,7 @@ // size of the second, skip the largest, and continue to next..., // until we meet the compactionThreshold limit. for (point = 0; point < compactionThreshold - 1; point++) { - if (fileSizes[point] < fileSizes[point + 1] * 2) { + if (fileSizes[point] < fileSizes[point + 1] * 2 && maxFilesToCompact < (countOfFiles - point)) { break; } skipped += fileSizes[point];