From 247fc8c8f1fdde1fbdb322400ff942e8fd2b59a4 Mon Sep 17 00:00:00 2001 From: Nihal Jain Date: Thu, 24 Jan 2019 23:47:57 +0530 Subject: [PATCH] HBASE-21636 Enhance the shell scan command to support missing scanner specifications like ReadType, IsolationLevel etc. --- hbase-shell/src/main/ruby/hbase/table.rb | 62 ++++++++++++------- hbase-shell/src/main/ruby/hbase_constants.rb | 5 ++ .../src/main/ruby/shell/commands/scan.rb | 11 +++- hbase-shell/src/test/ruby/hbase/table_test.rb | 24 +++++++ 4 files changed, 78 insertions(+), 24 deletions(-) diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 7a334a0814..3515b175c6 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -59,8 +59,8 @@ module Hbase <<-EOF Help for table-reference commands. -You can either create a table via 'create' and then manipulate the table via commands like 'put', 'get', etc. -See the standard help information for how to use each of these commands. +You can either create a table via 'create' and then manipulate the table via commands like +'put', 'get', etc. See the standard help information for how to use each of these commands. However, as of 0.96, you can also get a reference to a table, on which you can invoke commands. For instance, you can get create a table and keep around a reference to it via: @@ -433,7 +433,8 @@ EOF get.setFilter(filter) end - get.setConsistency(org.apache.hadoop.hbase.client.Consistency.valueOf(consistency)) if consistency + get.setConsistency( + org.apache.hadoop.hbase.client.Consistency.valueOf(consistency)) if consistency get.setReplicaId(replicaId) if replicaId # Call hbase for the results @@ -448,9 +449,9 @@ EOF res = {} result.listCells.each do |c| family = convert_bytes_with_position(c.getFamilyArray, - c.getFamilyOffset, c.getFamilyLength, converter_class, converter) + c.getFamilyOffset, c.getFamilyLength, converter_class, converter) qualifier = convert_bytes_with_position(c.getQualifierArray, - c.getQualifierOffset, c.getQualifierLength, converter_class, converter) + c.getQualifierOffset, c.getQualifierLength, converter_class, converter) column = "#{family}:#{qualifier}" value = to_string(column, c, maxlength, converter_class, converter) @@ -508,12 +509,21 @@ EOF # Normalize column names columns = [columns] if columns.class == String limit = args['LIMIT'] || -1 + replica_id = args[REGION_REPLICA_ID] + isolation_level = args[ISOLATION_LEVEL] + read_type = args[READ_TYPE] + allow_partial_results = + args[ALLOW_PARTIAL_RESULTS].nil? ? false: args[ALLOW_PARTIAL_RESULTS] + batch = args[BATCH] || -1 + max_result_size = args[MAX_RESULT_SIZE] || -1 + unless columns.is_a?(Array) raise ArgumentError, 'COLUMNS must be specified as a String or an Array' end scan = if stoprow - org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes, stoprow.to_java_bytes) + org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes, + stoprow.to_java_bytes) else org.apache.hadoop.hbase.client.Scan.new(startrow.to_java_bytes) end @@ -549,10 +559,19 @@ EOF scan.setMaxVersions(versions) if versions > 1 scan.setTimeRange(timerange[0], timerange[1]) if timerange scan.setRaw(raw) - scan.setCaching(limit) if limit > 0 + scan.setLimit(limit) if limit > 0 set_attributes(scan, attributes) if attributes set_authorizations(scan, authorizations) if authorizations - scan.setConsistency(org.apache.hadoop.hbase.client.Consistency.valueOf(consistency)) if consistency + scan.setConsistency( + org.apache.hadoop.hbase.client.Consistency.valueOf(consistency)) if consistency + scan.setReplicaId(replica_id) if replica_id + scan.setIsolationLevel( + org.apache.hadoop.hbase.client.IsolationLevel.valueOf(isolation_level)) if isolation_level + scan.setReadType( + org.apache.hadoop.hbase.client::Scan::ReadType.valueOf(read_type)) if read_type + scan.setAllowPartialResults(allow_partial_results) if allow_partial_results + scan.setBatch(batch) if batch > 0 + scan.setMaxResultSize(max_result_size) if max_result_size > 0 else scan = org.apache.hadoop.hbase.client.Scan.new end @@ -571,7 +590,6 @@ EOF raise(ArgumentError, 'Scan argument should be org.apache.hadoop.hbase.client.Scan') \ unless scan.nil? || scan.is_a?(org.apache.hadoop.hbase.client.Scan) - limit = args['LIMIT'] || -1 maxlength = args.delete('MAXLENGTH') || -1 converter = args.delete(FORMATTER) || nil converter_class = args.delete(FORMATTER_CLASS) || 'org.apache.hadoop.hbase.util.Bytes' @@ -591,9 +609,9 @@ EOF row.listCells.each do |c| family = convert_bytes_with_position(c.getFamilyArray, - c.getFamilyOffset, c.getFamilyLength, converter_class, converter) + c.getFamilyOffset, c.getFamilyLength, converter_class, converter) qualifier = convert_bytes_with_position(c.getQualifierArray, - c.getQualifierOffset, c.getQualifierLength, converter_class, converter) + c.getQualifierOffset, c.getQualifierLength, converter_class, converter) column = "#{family}:#{qualifier}" cell = to_string(column, c, maxlength, converter_class, converter) @@ -605,13 +623,8 @@ EOF res[key][column] = cell end end - # One more row processed count += 1 - if limit > 0 && count >= limit - # If we reached the limit, exit before the next call to hasNext - break - end end scanner.close @@ -647,7 +660,8 @@ EOF end def set_authorizations(oprattr, authorizations) - raise(ArgumentError, 'Authorizations must be a Array type') unless authorizations.is_a?(Array) + raise(ArgumentError, + 'Authorizations must be a Array type') unless authorizations.is_a?(Array) auths = [authorizations].flatten.compact oprattr.setAuthorizations( org.apache.hadoop.hbase.security.visibility.Authorizations.new( @@ -733,30 +747,31 @@ EOF if is_meta_table? if column == 'info:regioninfo' || column == 'info:splitA' || column == 'info:splitB' hri = org.apache.hadoop.hbase.HRegionInfo.parseFromOrNull(kv.getValueArray, - kv.getValueOffset, kv.getValueLength) + kv.getValueOffset, kv.getValueLength) return format('timestamp=%d, value=%s', kv.getTimestamp, hri.toString) end if column == 'info:serverstartcode' if kv.getValueLength > 0 str_val = org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValueArray, - kv.getValueOffset, kv.getValueLength) + kv.getValueOffset, kv.getValueLength) else str_val = org.apache.hadoop.hbase.util.Bytes.toStringBinary(kv.getValueArray, - kv.getValueOffset, kv.getValueLength) + kv.getValueOffset, kv.getValueLength) end return format('timestamp=%d, value=%s', kv.getTimestamp, str_val) end end if org.apache.hadoop.hbase.CellUtil.isDelete(kv) - val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type.codeToType(kv.getTypeByte)}" + val ="timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type.codeToType(kv.getTypeByte)}" else val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv, converter_class, converter)}" end maxlength != -1 ? val[0, maxlength] : val end - def convert(column, kv, converter_class = 'org.apache.hadoop.hbase.util.Bytes', converter = 'toStringBinary') + def convert(column, kv, converter_class = 'org.apache.hadoop.hbase.util.Bytes', + converter = 'toStringBinary') # use org.apache.hadoop.hbase.util.Bytes as the default class converter_class = 'org.apache.hadoop.hbase.util.Bytes' unless converter_class # use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor @@ -792,7 +807,8 @@ EOF end # if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair. - # 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and "qualifier" only + # 1. return back normal column pair as usual, i.e., "cf:qualifier[:CONVERTER]" to "cf" and + # "qualifier" only # 2. register the CONVERTER information based on column spec - "cf:qualifier" def set_converter(column) family = String.from_java_bytes(column[0]) diff --git a/hbase-shell/src/main/ruby/hbase_constants.rb b/hbase-shell/src/main/ruby/hbase_constants.rb index 554e7387c3..665536d3d6 100644 --- a/hbase-shell/src/main/ruby/hbase_constants.rb +++ b/hbase-shell/src/main/ruby/hbase_constants.rb @@ -65,6 +65,11 @@ module HBaseConstants NUMREGIONS = 'NUMREGIONS'.freeze REGION_REPLICATION = 'REGION_REPLICATION'.freeze REGION_REPLICA_ID = 'REGION_REPLICA_ID'.freeze + ISOLATION_LEVEL = 'ISOLATION_LEVEL'.freeze + READ_TYPE = 'READ_TYPE'.freeze + ALLOW_PARTIAL_RESULTS = 'ALLOW_PARTIAL_RESULTS'.freeze + BATCH = 'BATCH'.freeze + MAX_RESULT_SIZE = 'MAX_RESULT_SIZE'.freeze CONFIGURATION = org.apache.hadoop.hbase.HConstants::CONFIGURATION ATTRIBUTES = 'ATTRIBUTES'.freeze VISIBILITY = 'VISIBILITY'.freeze diff --git a/hbase-shell/src/main/ruby/shell/commands/scan.rb b/hbase-shell/src/main/ruby/shell/commands/scan.rb index 96382f3195..2dc377e402 100644 --- a/hbase-shell/src/main/ruby/shell/commands/scan.rb +++ b/hbase-shell/src/main/ruby/shell/commands/scan.rb @@ -25,7 +25,9 @@ module Shell Scan a table; pass table name and optionally a dictionary of scanner specifications. Scanner specifications may include one or more of: TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP, -MAXLENGTH or COLUMNS, CACHE or RAW, VERSIONS, ALL_METRICS or METRICS +MAXLENGTH, COLUMNS, CACHE, RAW, VERSIONS, ALL_METRICS, METRICS, +REGION_REPLICA_ID, ISOLATION_LEVEL, READ_TYPE, ALLOW_PARTIAL_RESULTS, +BATCH or MAX_RESULT_SIZE If no columns are specified, all columns will be scanned. To scan all members of a column family, leave the qualifier empty as in @@ -56,6 +58,8 @@ Some examples: hbase> scan 't1', {FILTER => org.apache.hadoop.hbase.filter.ColumnPaginationFilter.new(1, 0)} hbase> scan 't1', {CONSISTENCY => 'TIMELINE'} + hbase> scan 't1', {ISOLATION_LEVEL => 'READ_UNCOMMITTED'} + hbase> scan 't1', {MAX_RESULT_SIZE => 123456} For setting the Operation Attributes hbase> scan 't1', { COLUMNS => ['c1', 'c2'], ATTRIBUTES => {'mykey' => 'myvalue'}} hbase> scan 't1', { COLUMNS => ['c1', 'c2'], AUTHORIZATIONS => ['PRIVATE','SECRET']} @@ -72,6 +76,11 @@ Disabled by default. Example: hbase> scan 't1', {RAW => true, VERSIONS => 10} +There is yet another option -- READ_TYPE -- which instructs the scanner to +use a specific read type. Example: + + hbase> scan 't1', {READ_TYPE => 'PREAD'} + Besides the default 'toStringBinary' format, 'scan' supports custom formatting by column. A user can define a FORMATTER by adding it to the column name in the scan specification. The FORMATTER can be stipulated: diff --git a/hbase-shell/src/test/ruby/hbase/table_test.rb b/hbase-shell/src/test/ruby/hbase/table_test.rb index e2645d805a..2ac03e50a7 100644 --- a/hbase-shell/src/test/ruby/hbase/table_test.rb +++ b/hbase-shell/src/test/ruby/hbase/table_test.rb @@ -602,6 +602,30 @@ module Hbase assert_nil(res['2']['x:b']) end + define_test 'scan should support REGION_REPLICA_ID' do + res = @test_table._scan_internal REGION_REPLICA_ID => 0 + assert_not_nil(res) + end + + define_test 'scan should support ISOLATION_LEVEL' do + res = @test_table._scan_internal ISOLATION_LEVEL => 'READ_COMMITTED' + assert_not_nil(res) + end + + define_test 'scan should support READ_TYPE parameter' do + res = @test_table._scan_internal READ_TYPE => 'PREAD' + assert_not_nil(res) + res = @test_table._scan_internal READ_TYPE => 'STREAM' + assert_not_nil(res) + res = @test_table._scan_internal READ_TYPE => 'DEFAULT' + assert_not_nil(res) + end + + define_test 'scan should support ALLOW_PARTIAL_RESULTS' do + res = @test_table._scan_internal ALLOW_PARTIAL_RESULTS => true + assert_not_nil(res) + end + define_test "scan should work with raw and version parameter" do # Create test table if it does not exist @test_name_raw = "hbase_shell_tests_raw_scan" -- 2.19.1