From 57e781fe0ed9b0cca8751fb107eb527f5a8445b7 Mon Sep 17 00:00:00 2001 From: Toshihiro Suzuki Date: Wed, 18 Apr 2018 14:47:04 +0900 Subject: [PATCH] HBASE-20293 get_splits returns duplicate split points when region replication is on --- hbase-shell/src/main/ruby/hbase/table.rb | 50 ++++++++++++---------- .../src/main/ruby/shell/commands/get_splits.rb | 3 +- hbase-shell/src/test/ruby/hbase/table_test.rb | 18 +++++++- hbase-shell/src/test/ruby/test_helper.rb | 11 +++++ 4 files changed, 57 insertions(+), 25 deletions(-) diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 3e3fb8e..5959224 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -20,6 +20,8 @@ include Java java_import org.apache.hadoop.hbase.util.Bytes +java_import org.apache.hadoop.hbase.client.RegionReplicaUtil +java_import org.apache.hadoop.hbase.client.Scan # Wrapper for org.apache.hadoop.hbase.client.Table @@ -48,8 +50,9 @@ module Hbase method = name.to_sym self.class_eval do define_method method do |*args| - @shell.internal_command(shell_command, internal_method_name, self, *args) - end + @shell.internal_command(shell_command, internal_method_name, self, + *args) + end end end @@ -143,7 +146,7 @@ EOF end #Case where attributes are specified without timestamp if timestamp.kind_of?(Hash) - timestamp.each do |k, v| + timestamp.each do |k, v| if k == 'ATTRIBUTES' set_attributes(p, v) elsif k == 'VISIBILITY' @@ -185,12 +188,12 @@ EOF timestamp = org.apache.hadoop.hbase.HConstants::LATEST_TIMESTAMP end d = org.apache.hadoop.hbase.client.Delete.new(row.to_s.to_java_bytes, timestamp) - if temptimestamp.kind_of?(Hash) - temptimestamp.each do |k, v| - if v.kind_of?(String) - set_cell_visibility(d, v) if v - end - end + if temptimestamp.is_a?(Hash) + temptimestamp.each do |_, v| + if v.is_a?(String) + set_cell_visibility(d, v) if v + end + end end if args.any? visibility = args[VISIBILITY] @@ -264,7 +267,7 @@ EOF # Count rows in a table def _count_internal(interval = 1000, caching_rows = 10) # We can safely set scanner caching with the first key only filter - scan = org.apache.hadoop.hbase.client.Scan.new + scan = Scan.new scan.setCacheBlocks(false) scan.setCaching(caching_rows) scan.setFilter(org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter.new) @@ -425,6 +428,7 @@ EOF org.apache.hadoop.hbase.util.Bytes::toLong(cell.getValue) end + # rubocop:disable Metrics/MethodLength def _hash_to_scan(args) if args.any? enablemetrics = args["ALL_METRICS"].nil? ? false : args["ALL_METRICS"] @@ -453,10 +457,10 @@ EOF end scan = if stoprow - org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes) - else - org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes) - end + Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes) + else + Scan.new(startrow.to_java_bytes) + end # This will overwrite any startrow/stoprow settings scan.setRowPrefixFilter(rowprefixfilter.to_java_bytes) if rowprefixfilter @@ -493,11 +497,12 @@ EOF set_authorizations(scan, authorizations) if authorizations scan.setConsistency(org.apache.hadoop.hbase.client.Consistency.valueOf(consistency)) if consistency else - scan = org.apache.hadoop.hbase.client.Scan.new + scan = Scan.new end scan end + # rubocop:enable Metrics/MethodLength def _get_scanner(args) @table.getScanner(_hash_to_scan(args)) @@ -508,7 +513,7 @@ EOF def _scan_internal(args = {}, scan = nil) raise(ArgumentError, "Args should be a Hash") unless args.kind_of?(Hash) raise(ArgumentError, "Scan argument should be org.apache.hadoop.hbase.client.Scan") \ - unless scan == nil || scan.kind_of?(org.apache.hadoop.hbase.client.Scan) + unless scan.nil? || scan.is_a?(Scan) limit = args["LIMIT"] || -1 maxlength = args.delete("MAXLENGTH") || -1 @@ -718,13 +723,14 @@ EOF #---------------------------------------------------------------------------------------------- # Get the split points for the table - def _get_splits_internal() - locator = @table.getRegionLocator() - locator.getAllRegionLocations() - .map { |i| Bytes.toStringBinary(i.getRegionInfo().getStartKey) } - .delete_if { |k| k == "" } + def _get_splits_internal + l = @table.getRegionLocator + l.getAllRegionLocations. + select { |s| RegionReplicaUtil.isDefaultReplica(s.getRegionInfo) }. + map { |i| Bytes.toStringBinary(i.getRegionInfo.getStartKey) }. + delete_if { |k| k == '' } ensure - locator.close() + l.close end end # rubocop:enable Metrics/ClassLength diff --git a/hbase-shell/src/main/ruby/shell/commands/get_splits.rb b/hbase-shell/src/main/ruby/shell/commands/get_splits.rb index 26be15f..2a74b0f 100644 --- a/hbase-shell/src/main/ruby/shell/commands/get_splits.rb +++ b/hbase-shell/src/main/ruby/shell/commands/get_splits.rb @@ -38,8 +38,7 @@ EOF def get_splits(table) splits = table._get_splits_internal() - puts(format('Total number of splits = %d', - numsplits: (splits.size + 1))) + puts(format('Total number of splits = %d', splits.size + 1)) splits end end diff --git a/hbase-shell/src/test/ruby/hbase/table_test.rb b/hbase-shell/src/test/ruby/hbase/table_test.rb index 6ffdf89..a631fc5 100644 --- a/hbase-shell/src/test/ruby/hbase/table_test.rb +++ b/hbase-shell/src/test/ruby/hbase/table_test.rb @@ -188,6 +188,7 @@ module Hbase end # Complex data management methods tests + # rubocop:disable Metrics/ClassLength class TableComplexMethodsTest < Test::Unit::TestCase include TestHelpers @@ -302,7 +303,8 @@ module Hbase assert_not_nil(res['x:b']) end - define_test "get should work with hash columns spec and TIMESTAMP and AUTHORIZATIONS" do + define_test 'get should work with hash columns spec and TIMESTAMP and' \ + ' AUTHORIZATIONS' do res = @test_table._get_internal('1', TIMESTAMP => 1234, AUTHORIZATIONS=>['PRIVATE']) assert_nil(res) end @@ -635,5 +637,19 @@ module Hbase assert_equal(0, splits.size) assert_equal([], splits) end + + define_test 'Split count for a table with region replicas' do + @test_table_name = 'tableWithRegionReplicas' + create_test_table_with_region_replicas(@test_table_name, 3, + SPLITS => ['10']) + @table = table(@test_table_name) + splits = @table._get_splits_internal + # In this case, total splits should be 1 even if the number of region + # replicas is 3. + assert_equal(1, splits.size) + assert_equal(['10'], splits) + drop_test_table(@test_table_name) + end end + # rubocop:enable Metrics/ClassLength end diff --git a/hbase-shell/src/test/ruby/test_helper.rb b/hbase-shell/src/test/ruby/test_helper.rb index b4bec90..c947439 100644 --- a/hbase-shell/src/test/ruby/test_helper.rb +++ b/hbase-shell/src/test/ruby/test_helper.rb @@ -107,6 +107,17 @@ module Hbase end end + def create_test_table_with_region_replicas(name, num_of_replicas, splits) + # Create the table if needed + unless admin.exists?(name) + admin.create name, 'f1', { REGION_REPLICATION => num_of_replicas }, + splits + end + + # Enable the table if needed + admin.enable(name) unless admin.enabled?(name) + end + def drop_test_table(name) return unless admin.exists?(name) begin -- 2.10.1 (Apple Git-78)