From 5d1b9c42151525403763c8823cda7c5c83f82ec7 Mon Sep 17 00:00:00 2001 From: Guangxu Cheng Date: Tue, 16 May 2017 00:41:51 +0800 Subject: [PATCH] HBASE-18001 Extend the "count" shell command to support specified conditions --- hbase-shell/src/main/ruby/hbase/table.rb | 26 +++++++++++++++++---- hbase-shell/src/main/ruby/shell/commands/count.rb | 9 ++++++- 2 files changed, 29 insertions(+), 6 deletions(-) diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 22bbcfe..661118f 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -291,12 +291,28 @@ EOF #---------------------------------------------------------------------------------------------- # Count rows in a table - def _count_internal(interval = 1000, caching_rows = 10) + def _count_internal(interval = 1000, scan = nil) + + raise(ArgumentError, "Scan argument should be org.apache.hadoop.hbase.client.Scan") \ + unless scan == nil || scan.kind_of?(org.apache.hadoop.hbase.client.Scan) # We can safely set scanner caching with the first key only filter - scan = org.apache.hadoop.hbase.client.Scan.new - scan.setCacheBlocks(false) - scan.setCaching(caching_rows) - scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) + + if scan == nil + scan = org.apache.hadoop.hbase.client.Scan.new + scan.setCacheBlocks(false) + scan.setCaching(10) + scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) + else + scan.setCacheBlocks(false) + filter = scan.getFilter() + firstKeyOnlyFilter = org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new + if filter == nil + scan.setFilter(firstKeyOnlyFilter) + else + firstKeyOnlyFilter.setReversed(filter.isReversed()) + scan.setFilter(org.apache.hadoop.hbase.filter.FilterList.new(filter, firstKeyOnlyFilter)) + end + end # Run the scanner scanner = @table.getScanner(scan) diff --git a/hbase-shell/src/main/ruby/shell/commands/count.rb b/hbase-shell/src/main/ruby/shell/commands/count.rb index 36250a6..2f2562b 100644 --- a/hbase-shell/src/main/ruby/shell/commands/count.rb +++ b/hbase-shell/src/main/ruby/shell/commands/count.rb @@ -35,6 +35,9 @@ parameter. Examples: hbase> count 't1', INTERVAL => 100000 hbase> count 't1', CACHE => 1000 hbase> count 't1', INTERVAL => 10, CACHE => 1000 + hbase> count 't1', FILTER => " + (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))" + hbase> count 't1', COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz' The same commands also can be run on a table reference. Suppose you had a reference t to table 't1', the corresponding commands would be: @@ -43,6 +46,9 @@ t to table 't1', the corresponding commands would be: hbase> t.count INTERVAL => 100000 hbase> t.count CACHE => 1000 hbase> t.count INTERVAL => 10, CACHE => 1000 + hbase> t.count FILTER => " + (QualifierFilter (>=, 'binary:xyz')) AND (TimestampsFilter ( 123, 456))" + hbase> t.count COLUMNS => ['c1', 'c2'], STARTROW => 'abc', STOPROW => 'xyz' EOF end @@ -60,10 +66,11 @@ EOF 'CACHE' => 10 }.merge(params) + scan = table._hash_to_scan(params) # Call the counter method @start_time = Time.now formatter.header - count = table._count_internal(params['INTERVAL'].to_i, params['CACHE'].to_i) do |cnt, row| + count = table._count_internal(params['INTERVAL'].to_i, scan) do |cnt, row| formatter.row([ "Current count: #{cnt}, row: #{row}" ]) end formatter.footer(count) -- 1.7.1