From 852482189ae0c5f0fe882dcc26d2d574a39251d2 Mon Sep 17 00:00:00 2001 From: Josh Elser Date: Wed, 17 May 2017 19:19:23 -0400 Subject: [PATCH] HBASE-18067 Allow default FORMATTER for shell put/get commands --- hbase-shell/src/main/ruby/hbase/table.rb | 51 ++++--- hbase-shell/src/main/ruby/hbase_constants.rb | 2 + hbase-shell/src/main/ruby/shell/commands/get.rb | 9 +- hbase-shell/src/main/ruby/shell/commands/scan.rb | 9 +- hbase-shell/src/test/ruby/shell/converter_test.rb | 157 ++++++++++++++++++++++ 5 files changed, 206 insertions(+), 22 deletions(-) create mode 100644 hbase-shell/src/test/ruby/shell/converter_test.rb diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 22bbcfe211..d92915835e 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -347,6 +347,8 @@ EOF authorizations = args[AUTHORIZATIONS] consistency = args.delete(CONSISTENCY) if args[CONSISTENCY] replicaId = args.delete(REGION_REPLICA_ID) if args[REGION_REPLICA_ID] + converter = args.delete(FORMATTER) || nil + converter_class = args.delete(FORMATTER_CLASS) || nil unless args.empty? columns = args[COLUMN] || args[COLUMNS] if args[VERSIONS] @@ -419,13 +421,13 @@ EOF # Print out results. Result can be Cell or RowResult. res = {} result.listCells.each do |c| - family = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getFamilyArray, - c.getFamilyOffset, c.getFamilyLength) - qualifier = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getQualifierArray, - c.getQualifierOffset, c.getQualifierLength) + family = convert_bytes_with_position(c.getFamilyArray, + c.getFamilyOffset, c.getFamilyLength, converter_class, converter) + qualifier = convert_bytes_with_position(c.getQualifierArray, + c.getQualifierOffset, c.getQualifierLength, converter_class, converter) column = "#{family}:#{qualifier}" - value = to_string(column, c, maxlength) + value = to_string(column, c, maxlength, converter_class, converter) if block_given? yield(column, value) @@ -544,6 +546,8 @@ EOF limit = args["LIMIT"] || -1 maxlength = args.delete("MAXLENGTH") || -1 + converter = args.delete(FORMATTER) || nil + converter_class = args.delete(FORMATTER_CLASS) || nil count = 0 res = {} @@ -555,17 +559,17 @@ EOF # Iterate results while iter.hasNext row = iter.next - key = org.apache.hadoop.hbase.util.Bytes::toStringBinary(row.getRow) + key = convert_bytes(row.getRow, converter_class, converter) is_stale |= row.isStale row.listCells.each do |c| - family = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getFamilyArray, - c.getFamilyOffset, c.getFamilyLength) - qualifier = org.apache.hadoop.hbase.util.Bytes::toStringBinary(c.getQualifierArray, - c.getQualifierOffset, c.getQualifierLength) + family = convert_bytes_with_position(c.getFamilyArray, + c.getFamilyOffset, c.getFamilyLength, converter_class, converter) + qualifier = convert_bytes_with_position(c.getQualifierArray, + c.getQualifierOffset, c.getQualifierLength, converter_class, converter) column = "#{family}:#{qualifier}" - cell = to_string(column, c, maxlength) + cell = to_string(column, c, maxlength, converter_class, converter) if block_given? yield(key, "column=#{column}, #{cell}") @@ -693,7 +697,7 @@ EOF # Make a String of the passed kv # Intercept cells whose format we know such as the info:regioninfo in hbase:meta - def to_string(column, kv, maxlength = -1) + def to_string(column, kv, maxlength = -1, converter_class = nil, converter=nil) if is_meta_table? if column == 'info:regioninfo' or column == 'info:splitA' or column == 'info:splitB' hri = org.apache.hadoop.hbase.HRegionInfo.parseFromOrNull(kv.getValueArray, @@ -715,16 +719,16 @@ EOF if kv.isDelete val = "timestamp=#{kv.getTimestamp}, type=#{org.apache.hadoop.hbase.KeyValue::Type::codeToType(kv.getType)}" else - val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv)}" + val = "timestamp=#{kv.getTimestamp}, value=#{convert(column, kv, converter_class, converter)}" end (maxlength != -1) ? val[0, maxlength] : val end - def convert(column, kv) + def convert(column, kv, converter_class='org.apache.hadoop.hbase.util.Bytes', converter='toStringBinary') #use org.apache.hadoop.hbase.util.Bytes as the default class - klazz_name = 'org.apache.hadoop.hbase.util.Bytes' + converter_class = 'org.apache.hadoop.hbase.util.Bytes' unless converter_class #use org.apache.hadoop.hbase.util.Bytes::toStringBinary as the default convertor - converter = 'toStringBinary' + converter = 'toStringBinary' unless converter if @converters.has_key?(column) # lookup the CONVERTER for certain column - "cf:qualifier" matches = /c\((.+)\)\.(.+)/.match(@converters[column]) @@ -737,8 +741,19 @@ EOF converter = matches[2] end end - method = eval(klazz_name).method(converter) - return method.call(org.apache.hadoop.hbase.CellUtil.cloneValue(kv)) # apply the converter + # apply the converter + convert_bytes(org.apache.hadoop.hbase.CellUtil.cloneValue(kv), klazz_name, converter) + end + + def convert_bytes(bytes, converter_class=nil, converter_method=nil) + convert_bytes_with_position(bytes, 0, bytes.length, converter_class, converter_method) + end + + def convert_bytes_with_position(bytes, offset, len, converter_class, converter_method) + # Avoid nil + converter_class = 'org.apache.hadoop.hbase.util.Bytes' unless converter_class + converter_method = 'toStringBinary' unless converter_method + eval(converter_class).method(converter_method).call(bytes, offset, len) end # if the column spec contains CONVERTER information, to get rid of :CONVERTER info from column pair. diff --git a/hbase-shell/src/main/ruby/hbase_constants.rb b/hbase-shell/src/main/ruby/hbase_constants.rb index 52819c0c21..7d6da9f06b 100644 --- a/hbase-shell/src/main/ruby/hbase_constants.rb +++ b/hbase-shell/src/main/ruby/hbase_constants.rb @@ -84,6 +84,8 @@ module HBaseConstants SERVER_NAME = 'SERVER_NAME' LOCALITY_THRESHOLD = 'LOCALITY_THRESHOLD' RESTORE_ACL = 'RESTORE_ACL' + FORMATTER = 'FORMATTER' + FORMATTER_CLASS = 'FORMATTER_CLASS' # Load constants from hbase java API def self.promote_constants(constants) diff --git a/hbase-shell/src/main/ruby/shell/commands/get.rb b/hbase-shell/src/main/ruby/shell/commands/get.rb index 8191c22cea..6b9ad424fa 100644 --- a/hbase-shell/src/main/ruby/shell/commands/get.rb +++ b/hbase-shell/src/main/ruby/shell/commands/get.rb @@ -53,8 +53,13 @@ Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt', 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } -Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot specify -a FORMATTER for all columns of a column family. +Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a +formatter for all columns (including, all key parts) using the "FORMATTER" +and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is +"org.apache.hadoop.hbase.util.Bytes". + + hbase> get 't1', 'r1', {FORMATTER => 'toString'} + hbase> get 't1', 'r1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'} The same commands also can be run on a reference to a table (obtained via get_table or create_table). Suppose you had a reference t to table 't1', the corresponding commands diff --git a/hbase-shell/src/main/ruby/shell/commands/scan.rb b/hbase-shell/src/main/ruby/shell/commands/scan.rb index b3cc5c8e9d..dda9899048 100644 --- a/hbase-shell/src/main/ruby/shell/commands/scan.rb +++ b/hbase-shell/src/main/ruby/shell/commands/scan.rb @@ -83,8 +83,13 @@ Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt', 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } -Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot -specify a FORMATTER for all columns of a column family. +Note that you can specify a FORMATTER by column only (cf:qualifier). You can set a +formatter for all columns (including, all key parts) using the "FORMATTER" +and "FORMATTER_CLASS" options. The default "FORMATTER_CLASS" is +"org.apache.hadoop.hbase.util.Bytes". + + hbase> scan 't1', {FORMATTER => 'toString'} + hbase> scan 't1', {FORMATTER_CLASS => 'org.apache.hadoop.hbase.util.Bytes', FORMATTER => 'toString'} Scan can also be used directly from a table, by first getting a reference to a table, like such: diff --git a/hbase-shell/src/test/ruby/shell/converter_test.rb b/hbase-shell/src/test/ruby/shell/converter_test.rb new file mode 100644 index 0000000000..8b6079bcb3 --- /dev/null +++ b/hbase-shell/src/test/ruby/shell/converter_test.rb @@ -0,0 +1,157 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require 'hbase_constants' +require 'shell' + +include HBaseConstants + +module Hbase + class ConverterTest < Test::Unit::TestCase + include TestHelpers + + non_ascii_text = '⻆⻇' + non_ascii_row = '⻄' + non_ascii_family = 'ㄹ' + non_ascii_qualifier = '⻅' + non_ascii_column = "#{non_ascii_family}:#{non_ascii_qualifier}" + hex_text = '\xE2\xBB\x86\xE2\xBB\x87' + hex_row = '\xE2\xBB\x84' + hex_family = '\xE3\x84\xB9' + hex_qualifier = '\xE2\xBB\x85' + hex_column = "#{hex_family}:#{hex_qualifier}" + + def setup + setup_hbase + end + + def teardown + shutdown + end + + define_test 'Test scan for non-ascii data' do + table_name = 'scan-test' + create_test_table(table_name) + # Write a record + command(:put, table_name, 'r1', 'x:a', non_ascii_text) + output = capture_stdout{ command(:scan, table_name) } + # Encoded value not there by default + assert(!output.include?(non_ascii_text)) + # Hex-encoded value is there by default (manually converted) + assert(output.include?(hex_text)) + + # Use the formatter method + output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString'}) } + # Should have chinese characters + assert(output.include?(non_ascii_text)) + # Should not have hex-encoded string + assert(!output.include?(hex_text)) + + # Use the formatter method + class + output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) } + # Should have chinese characters + assert(output.include?(non_ascii_text)) + # Should not have hex-encoded string + assert(!output.include?(hex_text)) + + command(:disable, table_name) + command(:drop, table_name) + command(:create, table_name, non_ascii_family) + + command(:put, table_name, non_ascii_row, non_ascii_column, non_ascii_text) + output = capture_stdout{ command(:scan, table_name) } + # By default, get hex-encoded data + assert(!output.include?(non_ascii_text)) + assert(!output.include?(non_ascii_row)) + assert(!output.include?(non_ascii_column)) + assert(output.include?(hex_text)) + assert(output.include?(hex_row)) + assert(output.include?(hex_column)) + + # Use the formatter method + output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString'}) } + # By default, get hex-encoded data + assert(output.include?(non_ascii_text)) + assert(output.include?(non_ascii_row)) + assert(output.include?(non_ascii_column)) + assert(!output.include?(hex_text)) + assert(!output.include?(hex_row)) + assert(!output.include?(hex_column)) + + # Use the formatter method + class + output = capture_stdout{ command(:scan, table_name, {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) } + # By default, get hex-encoded data + assert(output.include?(non_ascii_text)) + assert(output.include?(non_ascii_row)) + assert(output.include?(non_ascii_column)) + assert(!output.include?(hex_text)) + assert(!output.include?(hex_row)) + assert(!output.include?(hex_column)) + end + + define_test 'Test get for non-ascii data' do + table_name = 'get-test' + create_test_table(table_name) + # Write a record + command(:put, table_name, 'r1', 'x:a', non_ascii_text) + output = capture_stdout{ command(:get, table_name, 'r1') } + # Encoded value not there by default + assert(!output.include?(non_ascii_text)) + # Hex-encoded value is there by default (manually converted) + assert(output.include?(hex_text)) + + # use the formatter method + output = capture_stdout{ command(:get, table_name, 'r1', {'FORMATTER'=>'toString'}) } + # Should have chinese characters + assert(output.include?(non_ascii_text)) + # Should not have hex-encoded string + assert(!output.include?(hex_text)) + + # use the formatter method + class + output = capture_stdout{ command(:get, table_name, 'r1', {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) } + # Should have chinese characters + assert(output.include?(non_ascii_text)) + # Should not have hex-encoded string + assert(!output.include?(hex_text)) + + command(:disable, table_name) + command(:drop, table_name) + command(:create, table_name, non_ascii_family) + + # use no formatter (expect hex) + command(:put, table_name, non_ascii_row, non_ascii_column, non_ascii_text) + output = capture_stdout{ command(:get, table_name, non_ascii_row) } + assert(!output.include?(non_ascii_text)) + assert(!output.include?(non_ascii_column)) + assert(output.include?(hex_text)) + assert(output.include?(hex_column)) + + # use the formatter method + output = capture_stdout{ command(:get, table_name, non_ascii_row, {'FORMATTER'=>'toString'}) } + assert(output.include?(non_ascii_text)) + assert(output.include?(non_ascii_column)) + assert(!output.include?(hex_text)) + assert(!output.include?(hex_column)) + + # use the formatter method + class + output = capture_stdout{ command(:get, table_name, non_ascii_row, {'FORMATTER'=>'toString', 'FORMATTER_CLASS' => 'org.apache.hadoop.hbase.util.Bytes'}) } + assert(output.include?(non_ascii_text)) + assert(output.include?(non_ascii_column)) + assert(!output.include?(hex_text)) + assert(!output.include?(hex_column)) + end + end +end \ No newline at end of file -- 2.12.2