diff --git a/hbase-shell/src/main/ruby/hbase/table.rb b/hbase-shell/src/main/ruby/hbase/table.rb index 22bbcfe..c49ee89 100644 --- a/hbase-shell/src/main/ruby/hbase/table.rb +++ b/hbase-shell/src/main/ruby/hbase/table.rb @@ -347,6 +347,7 @@ authorizations = args[AUTHORIZATIONS] consistency = args.delete(CONSISTENCY) if args[CONSISTENCY] replicaId = args.delete(REGION_REPLICA_ID) if args[REGION_REPLICA_ID] + formatters = args[FORMATTER] unless args.empty? columns = args[COLUMN] || args[COLUMNS] if args[VERSIONS] @@ -362,7 +363,8 @@ unless columns.kind_of?(Array) raise ArgumentError, "Failed parse column argument type #{args.inspect}, #{args.class}" end - + set_formatter(formatters,columns) if formatters + # Get each column name and add it to the filter columns.each do |column| family, qualifier = parse_column_name(column.to_s) @@ -477,6 +479,7 @@ attributes = args[ATTRIBUTES] authorizations = args[AUTHORIZATIONS] consistency = args[CONSISTENCY] + formatters = args[FORMATTER] # Normalize column names columns = [columns] if columns.class == String limit = args["LIMIT"] || -1 @@ -495,6 +498,7 @@ # Clear converters from last scan. @converters.clear() + set_formatter(formatters,columns) if formatters columns.each do |c| family, qualifier = parse_column_name(c.to_s) @@ -595,6 +599,19 @@ oprattr.setAttribute(k.to_s, v.to_java_bytes) end end + + def set_formatter(formatters,columns) + raise(ArgumentError, "Formatters must be a Hash type or Array type") unless formatters.kind_of?(Hash) || formatters.kind_of?(Array) + if formatters.kind_of?(Array) + formatters.each_index do |idx| + @converters["#{columns[idx].to_s}"]="#{formatters[idx].to_s}" + end + else + for k,v in formatters + @converters["#{k.to_s}"] = "#{v.to_s}" + end + end + end def set_cell_permissions(op, permissions) raise(ArgumentError, "Permissions must be a Hash type") unless permissions.kind_of?(Hash) @@ -687,7 +704,6 @@ # Returns family and (when has it) qualifier for a column name def parse_column_name(column) split = org.apache.hadoop.hbase.KeyValue.parseColumn(column.to_java_bytes) - set_converter(split) if split.length > 1 return split[0], (split.length > 1) ? split[1] : nil end diff --git a/hbase-shell/src/main/ruby/hbase_constants.rb b/hbase-shell/src/main/ruby/hbase_constants.rb index c02d5c6..b17fbc2 100644 --- a/hbase-shell/src/main/ruby/hbase_constants.rb +++ b/hbase-shell/src/main/ruby/hbase_constants.rb @@ -81,6 +81,7 @@ NAMESPACES = 'NAMESPACES' CONFIG = 'CONFIG' DATA = 'DATA' + FORMATTER = 'FORMATTER' # Load constants from hbase java API def self.promote_constants(constants) diff --git a/hbase-shell/src/main/ruby/shell/commands/get.rb b/hbase-shell/src/main/ruby/shell/commands/get.rb index 8191c22..9527bcc 100644 --- a/hbase-shell/src/main/ruby/shell/commands/get.rb +++ b/hbase-shell/src/main/ruby/shell/commands/get.rb @@ -48,11 +48,23 @@ 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString) 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'. + +The custom formatting can be specified in two ways: + + 1. Specifying it for each column by column qualifier + 2. Without the column qualifier in which case the column qualifier will be derived from +COLUMNS specification and applied in the order they appear in COLUMNS specification. Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: - hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1:toInt', - 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } + hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1', + 'cf:qualifier2'], FORMATTER => {'cf:qualifier1'=> 'toInt','cf:qualifier2'=> 'c(org.apache.hadoop.hbase.util.Bytes).toInt'] } + + or + + hbase> get 't1', 'r1' {COLUMN => ['cf:qualifier1', + 'cf:qualifier2'], FORMATTER => ['toInt','c(org.apache.hadoop.hbase.util.Bytes).toInt']} + Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot specify a FORMATTER for all columns of a column family. diff --git a/hbase-shell/src/main/ruby/shell/commands/scan.rb b/hbase-shell/src/main/ruby/shell/commands/scan.rb index b3cc5c8..e578373 100644 --- a/hbase-shell/src/main/ruby/shell/commands/scan.rb +++ b/hbase-shell/src/main/ruby/shell/commands/scan.rb @@ -25,7 +25,7 @@ Scan a table; pass table name and optionally a dictionary of scanner specifications. Scanner specifications may include one or more of: TIMERANGE, FILTER, LIMIT, STARTROW, STOPROW, ROWPREFIXFILTER, TIMESTAMP, -MAXLENGTH or COLUMNS, CACHE or RAW, VERSIONS, ALL_METRICS or METRICS +MAXLENGTH or COLUMNS, CACHE or RAW, VERSIONS, ALL_METRICS or METRICS or FORMATTER If no columns are specified, all columns will be scanned. To scan all members of a column family, leave the qualifier empty as in @@ -79,12 +79,24 @@ 1. either as a org.apache.hadoop.hbase.util.Bytes method name (e.g, toInt, toString) 2. or as a custom class followed by method name: e.g. 'c(MyFormatterClass).format'. -Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: - hbase> scan 't1', {COLUMNS => ['cf:qualifier1:toInt', - 'cf:qualifier2:c(org.apache.hadoop.hbase.util.Bytes).toInt'] } +The custom formatting can be specified in two ways: -Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot -specify a FORMATTER for all columns of a column family. + 1. Specifying it for each column by column qualifier + 2. Without the column qualifier in which case the column qualifier will be derived from +COLUMNS specification and applied in the order they appear in COLUMNS specification. + +Example formatting cf:qualifier1 and cf:qualifier2 both as Integers: + + hbase> scan 't1', {COLUMN => ['cf:qualifier1','cf:qualifier2'], + FORMATTER => {'cf:qualifier1'=> 'toInt','cf:qualifier2'=> 'c(org.apache.hadoop.hbase.util.Bytes).toInt'] } + + or + + hbase> scan 't1', {COLUMN => ['cf:qualifier1','cf:qualifier2'], + FORMATTER => [ 'toInt','c(org.apache.hadoop.hbase.util.Bytes).toInt'] + +Note that you can specify a FORMATTER by column only (cf:qualifier). You cannot specify +a FORMATTER for all columns of a column family. Scan can also be used directly from a table, by first getting a reference to a table, like such: diff --git a/hbase-shell/src/test/ruby/hbase/table_test.rb b/hbase-shell/src/test/ruby/hbase/table_test.rb index faf9827..2e34e67 100644 --- a/hbase-shell/src/test/ruby/hbase/table_test.rb +++ b/hbase-shell/src/test/ruby/hbase/table_test.rb @@ -364,19 +364,43 @@ assert_equal(res.keys.sort, [ 'x:a', 'x:b' ]) end - define_test "get should support COLUMNS with value CONVERTER information" do + define_test "get should support COLUMNS with value CONVERTER information with column qualifier" do @test_table.put(1, "x:c", [1024].pack('N')) @test_table.put(1, "x:d", [98].pack('N')) + @test_table.put(1, "x:e:f", [200].pack('N')) + begin - res = @test_table._get_internal('1', ['x:c:toInt'], ['x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt']) + res = @test_table._get_internal('1', COLUMNS => ['x:c', 'x:d', 'x:e:f'],FORMATTER=>{'x:c'=>'toInt','x:d'=>'c(org.apache.hadoop.hbase.util.Bytes).toInt','x:e:f'=>'toInt'}) assert_not_nil(res) assert_kind_of(Hash, res) assert_not_nil(/value=1024/.match(res['x:c'])) assert_not_nil(/value=98/.match(res['x:d'])) + assert_not_nil(/value=200/.match(res['x:e:f'])) ensure # clean up newly added columns for this test only. @test_table.delete(1, "x:c") @test_table.delete(1, "x:d") + @test_table.delete(1, "x:e:f") + end + end + + define_test "get should support COLUMNS with value CONVERTER information without column qualifier" do + @test_table.put(1, "x:c", [1024].pack('N')) + @test_table.put(1, "x:d", [98].pack('N')) + @test_table.put(1, "x:e:f", [200].pack('N')) + + begin + res = @test_table._get_internal('1', COLUMNS => ['x:c', 'x:d', 'x:e:f'],FORMATTER=>['toInt','c(org.apache.hadoop.hbase.util.Bytes).toInt','toInt']) + assert_not_nil(res) + assert_kind_of(Hash, res) + assert_not_nil(/value=1024/.match(res['x:c'])) + assert_not_nil(/value=98/.match(res['x:d'])) + assert_not_nil(/value=200/.match(res['x:e:f'])) + ensure + # clean up newly added columns for this test only. + @test_table.delete(1, "x:c") + @test_table.delete(1, "x:d") + @test_table.delete(1, "x:e:f") end end @@ -564,22 +588,45 @@ assert_equal([rows.keys.size,false], res) end - define_test "scan should support COLUMNS with value CONVERTER information" do + define_test "scan should support COLUMNS with value CONVERTER information with the column qualifier" do @test_table.put(1, "x:c", [1024].pack('N')) @test_table.put(1, "x:d", [98].pack('N')) + @test_table.put(1,"x:e:f",[200].pack('N')) begin - res = @test_table._scan_internal COLUMNS => ['x:c:toInt', 'x:d:c(org.apache.hadoop.hbase.util.Bytes).toInt'] + res = @test_table._scan_internal COLUMNS => ['x:c', 'x:d', 'x:e:f'],FORMATTER=>{'x:c'=>'toInt','x:d'=>'c(org.apache.hadoop.hbase.util.Bytes).toInt','x:e:f'=>'toInt'} assert_not_nil(res) assert_kind_of(Hash, res) assert_not_nil(/value=1024/.match(res['1']['x:c'])) assert_not_nil(/value=98/.match(res['1']['x:d'])) + assert_not_nil(/value=200/.match(res['1']['x:e:f'])) ensure # clean up newly added columns for this test only. @test_table.delete(1, "x:c") @test_table.delete(1, "x:d") + @test_table.delete(1, "x:e:f") + end + end + + define_test "scan should support COLUMNS with value CONVERTER information without the column qualifier" do + @test_table.put(1, "x:c", [1024].pack('N')) + @test_table.put(1, "x:d", [98].pack('N')) + @test_table.put(1,"x:e:f",[200].pack('N')) + begin + res = @test_table._scan_internal COLUMNS => ['x:c', 'x:d', 'x:e:f'],FORMATTER=>['toInt','c(org.apache.hadoop.hbase.util.Bytes).toInt','toInt'] + assert_not_nil(res) + assert_kind_of(Hash, res) + assert_not_nil(/value=1024/.match(res['1']['x:c'])) + assert_not_nil(/value=98/.match(res['1']['x:d'])) + assert_not_nil(/value=200/.match(res['1']['x:e:f'])) + ensure + # clean up newly added columns for this test only. + @test_table.delete(1, "x:c") + @test_table.delete(1, "x:d") + @test_table.delete(1, "x:e:f") end end + define_test "scan should support FILTER" do @test_table.put(1, "x:v", "thisvalue") begin