diff --git a/hbase-server/src/main/ruby/hbase.rb b/hbase-server/src/main/ruby/hbase.rb index 87512bf..835ada3 100644 --- a/hbase-server/src/main/ruby/hbase.rb +++ b/hbase-server/src/main/ruby/hbase.rb @@ -32,6 +32,7 @@ include_class('java.lang.Long') {|package,name| "J#{name}" } include_class('java.lang.Boolean') {|package,name| "J#{name}" } module HBaseConstants + KEYONLY = "KEYONLY" COLUMN = "COLUMN" COLUMNS = "COLUMNS" TIMESTAMP = "TIMESTAMP" diff --git a/hbase-server/src/main/ruby/hbase/table.rb b/hbase-server/src/main/ruby/hbase/table.rb index 2acd94e..28640e1 100644 --- a/hbase-server/src/main/ruby/hbase/table.rb +++ b/hbase-server/src/main/ruby/hbase/table.rb @@ -169,12 +169,39 @@ EOF #---------------------------------------------------------------------------------------------- # Count rows in a table - def _count_internal(interval = 1000, caching_rows = 10) + def _count_internal(interval = 1000, caching_rows = 10, key_only = nil, column = nil, start_row = nil, stop_row = nil) # We can safely set scanner caching with the first key only filter scan = org.apache.hadoop.hbase.client.Scan.new scan.cache_blocks = false scan.caching = caching_rows - scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) + + # By default, only use the first key only filter. + # If KEYONLY is set to yes, the key only filter will also be applied, + # and meanwhile ignore the COLUMN parameter. + filters = org.apache.hadoop.hbase.filter.FilterList.new + filters.addFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) + # If KEYONLY is set to yes, the COLUMN parameter will be ignored. + if key_only == 'yes': + filters.addFilter(org.apache.hadoop.hbase.filter.KeyOnlyFilter.new) + else + if column + family, qualifier = parse_column_name(column) + if qualifier + scan.addColumn(family, qualifier) + else + scan.addFamily(family) + end + end + end + scan.setFilter(filters) + + # Set the start and stop row if specified + if start_row + scan.setStartRow(start_row.to_java_bytes) + end + if stop_row + scan.setStopRow(stop_row.to_java_bytes) + end # Run the scanner scanner = @table.getScanner(scan) diff --git a/hbase-server/src/main/ruby/shell/commands/count.rb b/hbase-server/src/main/ruby/shell/commands/count.rb index e88c445..f9a03f4 100644 --- a/hbase-server/src/main/ruby/shell/commands/count.rb +++ b/hbase-server/src/main/ruby/shell/commands/count.rb @@ -35,6 +35,21 @@ parameter. Examples: hbase> count 't1', CACHE => 1000 hbase> count 't1', INTERVAL => 10, CACHE => 1000 +By default, the first key/value pair of each row will be returned to count. +If you only want to count the row id to get a faster counting, you can set +the KEYONLY option to 'yes': + hbase> count 't1', KEYONLY => 'yes' + +You may also ask the command to only count on one column: + hbase> count 't1', COLUMN => 'f1:a' + hbase> count 't1', COLUMN => 'f1' + +The command supports range counting, you can do this through specifying the start +row (inclusive) and stop row(exclusive): + hbase> count 't1', STARTROW => 'row-2', STOPROW => 'row-100' + hbase> count 't1', STARTROW => 'row-2' + hbase> count 't1', STOPROW => 'row-100' + The same commands also can be run on a table reference. Suppose you had a reference t to table 't1', the corresponding commands would be: @@ -56,13 +71,17 @@ EOF # Merge params with defaults params = { 'INTERVAL' => 1000, - 'CACHE' => 10 + 'CACHE' => 10, + 'KEYONLY' => nil, + 'COLUMN' => nil, + 'STARTROW' => nil, + 'STOPROW' => nil }.merge(params) # Call the counter method now = Time.now formatter.header - count = table._count_internal(params['INTERVAL'].to_i, params['CACHE'].to_i) do |cnt, row| + count = table._count_internal(params['INTERVAL'].to_i, params['CACHE'].to_i, params['KEYONLY'], params['COLUMN'], params['STARTROW'], params['STOPROW']) do |cnt, row| formatter.row([ "Current count: #{cnt}, row: #{row}" ]) end formatter.footer(now, count)