From f43db87f9a95177d48786637a317b07a4640cda9 Mon Sep 17 00:00:00 2001 From: Balazs Meszaros Date: Tue, 25 Sep 2018 15:28:37 -0700 Subject: [PATCH] HBASE-21231 Add documentation for MajorCompactor This patch also adds 'majorcompact' to bin/hbase. --- bin/hbase | 3 ++ bin/hbase.cmd | 7 +++- src/main/asciidoc/_chapters/ops_mgt.adoc | 51 ++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/bin/hbase b/bin/hbase index 559a02e9f8..2e2ccd7c99 100755 --- a/bin/hbase +++ b/bin/hbase @@ -97,6 +97,7 @@ if [ $# = 0 ]; then if [ "${in_omnibus_tarball}" = "true" ]; then echo " wal Write-ahead-log analyzer" echo " hfile Store file analyzer" + echo " majorcompact Runs major compaction on a table" echo " zkcli Run the ZooKeeper shell" echo " master Run an HBase HMaster node" echo " regionserver Run an HBase HRegionServer node" @@ -487,6 +488,8 @@ elif [ "$COMMAND" = "wal" ] ; then CLASS='org.apache.hadoop.hbase.wal.WALPrettyPrinter' elif [ "$COMMAND" = "hfile" ] ; then CLASS='org.apache.hadoop.hbase.io.hfile.HFilePrettyPrinter' +elif [ "$COMMAND" = "majorcompact" ] ; then + CLASS='org.apache.hadoop.hbase.util.compaction.MajorCompactor' elif [ "$COMMAND" = "zkcli" ] ; then CLASS="org.apache.hadoop.hbase.zookeeper.ZKMainServer" for f in $HBASE_HOME/lib/zkcli/*.jar; do diff --git a/bin/hbase.cmd b/bin/hbase.cmd index fbeb1f8290..def374e81c 100644 --- a/bin/hbase.cmd +++ b/bin/hbase.cmd @@ -211,7 +211,7 @@ goto :MakeCmdArgsLoop set hbase-command-arguments=%_hbasearguments% @rem figure out which class to run -set corecommands=shell master regionserver thrift thrift2 rest avro hlog wal hbck hfile zookeeper zkcli mapredcp +set corecommands=shell master regionserver thrift thrift2 rest avro hlog wal hbck hfile majorcompact zookeeper zkcli mapredcp for %%i in ( %corecommands% ) do ( if "%hbase-command%"=="%%i" set corecommand=true ) @@ -423,6 +423,10 @@ goto :eof set CLASS=org.apache.hadoop.hbase.io.hfile.HFile goto :eof +:majorcompact + set CLASS=org.apache.hadoop.hbase.util.compaction.MajorCompactor + goto :eof + :zkcli set CLASS=org.apache.hadoop.hbase.zookeeper.ZKMainServer set CLASSPATH=!CLASSPATH!;%HBASE_HOME%\lib\zkcli\* @@ -455,6 +459,7 @@ goto :eof echo hbck Run the hbase 'fsck' tool echo wal Write-ahead-log analyzer echo hfile Store file analyzer + echo majorcompact Runs major compaction on a table echo zkcli Run the ZooKeeper shell echo master Run an HBase HMaster node echo regionserver Run an HBase HRegionServer node diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 443d895493..709bd7338b 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -55,6 +55,7 @@ Some commands take arguments. Pass no args or -h for usage. snapshot Tool for managing snapshots wal Write-ahead-log analyzer hfile Store file analyzer + majorcompact Runs major compaction on a table zkcli Run the ZooKeeper shell master Run an HBase HMaster node regionserver Run an HBase HRegionServer node @@ -424,6 +425,56 @@ In those versions, you can print the contents of a WAL using the same configurat ---- ==== +[[majorcompact.tool]] +=== Compaction tool + +It runs a major compaction on the specified table. It is a standalone tool, so compactions +can be started and monitored without HBase shell. It has some advantages over the +`major_compact` shell command: + +* It can be scheduled by cron from example. +* It is synchronous, so it won't exit until the compaction finishes. +* The I/O bandwidth of the cluster can be limited. + +Usage: + +---- +$ bin/hbase majorcompact +usage: MajorCompactor [-cf ] [-dryRun] [-minModTime ] [-retries + ] [-rootDir ] -servers [-sleep ] -table + [-zk ] + +Usage instructions + + -cf column families: comma separated eg: a,b,c + -dryRun Dry run, will just output a list of regions that + require compaction based on parameters passed + -minModTime Compact if store files have modification time < minModTime + -retries Max # of retries for a compaction request, defaults to 3 + -rootDir hbase.rootDir + -servers Concurrent servers compacting + -sleep Time to sleepForMs (ms) for checking compaction + status per region and available work queues: default 30s + -table table name + -zk zk quorum +---- + +The tool compacts every region of a table. The table must be specified by `-table` +parameter. Every column families are compacted by default, but they can be explicitly +defined by `-cf` switch. The another required parameter is the `-servers`, which +limits the number of concurrent compactions (how many region server can run the +compaction simultaneously). It is possible not to compact too young HFiles with +the `-minModTime` switch (the parameter is in Unix time in milliseconds). + +For example: + +---- +$ bin/hbase majorcompact -servers 4 -table testtable +---- + +It will run major compaction on `testtable` and no more than `4` region servers +will compact at the same time. + [[compression.tool]] === Compression Tool -- 2.18.0