From 568d8352d8978dcdb9b1d985630bb43b6790fa34 Mon Sep 17 00:00:00 2001 From: Sudeep Sunthankar Date: Mon, 23 Jan 2017 23:00:26 +1100 Subject: [PATCH] 1) Unnecessary data members moved from CellScanner Interface to KeyValueCodec implementation. 2) KeyValueCodec will be instantiated using KeyValueCodec::Encoder() and KeyValueCodec::Decoder() methods. diff --git a/hbase-native-client/core/BUCK b/hbase-native-client/core/BUCK index 0d1bc93..fd0a185 100644 --- a/hbase-native-client/core/BUCK +++ b/hbase-native-client/core/BUCK @@ -36,6 +36,8 @@ cxx_library( "request_converter.h", "response_converter.h", "table.h", + "cell_scanner.h", + "keyvalue_codec.h", ], srcs=[ "cell.cc", @@ -51,6 +53,7 @@ cxx_library( "request_converter.cc", "response_converter.cc", "table.cc", + "keyvalue_codec.cc", ], deps=[ "//connection:connection", diff --git a/hbase-native-client/core/cell_scanner.h b/hbase-native-client/core/cell_scanner.h new file mode 100644 index 0000000..fdb56b7 --- /dev/null +++ b/hbase-native-client/core/cell_scanner.h @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once +#include +#include +#include "core/cell.h" + +namespace hbase { +/** + * @brief Interface for parsing sequence of Cells + * + * This interface will be used to implement the codec classes for cells present in cell_block + * Sequence of cells are obtained from cell_meta_block. + */ +class CellScanner { + public: + virtual ~CellScanner() {} + + /** + * @brief This method will be used to iterate the cells. Implementation will be defined in Codec + * classes. + * Typical usage will be :- + * while(cell_scanner.Advance()){ + * auto current_cell = cell_scanner.Current(); + * } + */ + virtual bool Advance() = 0; + + /** + * @brief returns the current cell + */ + const std::shared_ptr& Current() const { return current_cell_; } + + protected: + /** + * Constructor + */ + explicit CellScanner(std::shared_ptr cell_block) : cell_block_(cell_block) {} + + std::shared_ptr cell_block_ = nullptr; + std::shared_ptr current_cell_ = nullptr; +}; + +} /* namespace hbase */ diff --git a/hbase-native-client/core/keyvalue_codec.cc b/hbase-native-client/core/keyvalue_codec.cc new file mode 100644 index 0000000..b218479 --- /dev/null +++ b/hbase-native-client/core/keyvalue_codec.cc @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/keyvalue_codec.h" +#include + +namespace hbase { + +KeyValueCodec::KeyValueCodec(std::shared_ptr cell_block, + uint32_t cell_block_start_offset, uint32_t cell_block_length) + : CellScanner(cell_block), + cell_block_start_offset_(cell_block_start_offset), + cell_block_length_(cell_block_length) {} + +KeyValueCodec::~KeyValueCodec() {} + +KeyValueCodec *KeyValueCodec::Decoder(std::shared_ptr cell_block, + uint32_t cell_block_start_offset, + uint32_t cell_block_length) { + return new KeyValueCodec(cell_block, cell_block_start_offset, cell_block_length); +} + +Cell *KeyValueCodec::Decode(folly::io::Cursor &cursor) { + uint32_t key_length = cursor.readBE(); + uint32_t value_length = cursor.readBE(); + uint16_t row_length = cursor.readBE(); + std::string row = cursor.readFixedString(row_length); + uint8_t column_family_length = cursor.readBE(); + std::string column_family = cursor.readFixedString(column_family_length); + int qualifier_length = + key_length - (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure_); + std::string column_qualifier = cursor.readFixedString(qualifier_length); + uint64_t timestamp = cursor.readBE(); + uint8_t key_type = cursor.readBE(); + std::string value = cursor.readFixedString(value_length); + + return new Cell(row, column_family, column_qualifier, timestamp, value, + static_cast(key_type)); +} + +bool KeyValueCodec::Advance() { + if (end_of_cell_block_) { + return false; + } + + if (cur_pos_ == cell_block_length_) { + end_of_cell_block_ = true; + return false; + } + + folly::io::Cursor cursor(cell_block_.get()); + cursor.skip(cell_block_start_offset_ + cur_pos_); + uint32_t current_cell_size = cursor.readBE(); + current_cell_.reset(Decode(cursor)); + cur_pos_ += kHBaseSizeOfInt_ + current_cell_size; + return true; +} + +uint32_t KeyValueCodec::CellBlockLength() const { return cell_block_length_; } +} /* namespace hbase */ diff --git a/hbase-native-client/core/keyvalue_codec.h b/hbase-native-client/core/keyvalue_codec.h new file mode 100644 index 0000000..b9d6854 --- /dev/null +++ b/hbase-native-client/core/keyvalue_codec.h @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include +#include +#include +#include "core/cell.h" +#include "core/cell_scanner.h" + +namespace hbase { +/** + * @brief Class for parsing sequence of Cells based on org.apache.hadoop.hbase.KeyValueCodec.java + * + * KeyValueCodec implements CellScanner interface. + * Sequence of cells are obtained from cell_block. + * We have Encoder and Decoder public methods which will return KeyvaueCodec instance which will be + *used to obtain individual cells in cell_block. + * Usage:- + * 1) Cell Decoding:- + * CellScanner *cell_scanner = KeyValueCodec::Decoder(cell_block, cb_start_offset, cb_length); + * while (cell_scanner->Advance()) { + * auto current_cell = cell_scanner->Current + * } + */ +class KeyValueCodec : public CellScanner { + public: + ~KeyValueCodec(); + + /** + * @brief Deccoder method returns a KeyValueCodec instance which can be used to call the Decode + * method + * @param cell_block Cell block to be decoded which is encoded as per KeyValueCodec.java + * @param cell_block_start_offset Offset from where we should start decoding of cell_block + * @param cell_block_length Total length of cell_block + */ + static KeyValueCodec* Decoder(std::shared_ptr cell_block, + uint32_t cell_block_start_offset, uint32_t cell_block_length); + + /** + * @brief Overridden from CellScanner. This method parses cell_block and stores the current in + * current_cell_. Current cell can be obtained using cell_scanner.Current(); + */ + bool Advance(); + + /** + * @brief returns the total length of cell_meta_block + */ + uint32_t CellBlockLength() const; + + private: + /** + * Constructor + */ + KeyValueCodec(std::shared_ptr cell_block, uint32_t cell_block_start_offset, + uint32_t cell_block_length); + + Cell* Decode(folly::io::Cursor& cursor); + + /** + * Size of boolean in bytes + */ + const int kHBaseSizeOfBoolean_ = sizeof(uint8_t) / sizeof(uint8_t); + + /** + * Size of byte in bytes + */ + const uint8_t kHBaseSizeOfByte_ = kHBaseSizeOfBoolean_; + + /** + * Size of int in bytes + */ + const uint32_t kHBaseSizeOfInt_ = sizeof(uint32_t) / kHBaseSizeOfByte_; + + /** + * Size of long in bytes + */ + const uint64_t kHBaseSizeOfLong_ = sizeof(uint64_t) / kHBaseSizeOfByte_; + + /** + * Size of Short in bytes + */ + const uint16_t kHBaseSizeOfShort_ = sizeof(uint16_t) / kHBaseSizeOfByte_; + + const uint32_t kHBaseSizeOfKeyLength_ = kHBaseSizeOfInt_; + const uint32_t kHBaseSizeOfValueLength_ = kHBaseSizeOfInt_; + const uint16_t kHBaseSizeOfRowLength_ = kHBaseSizeOfShort_; + const uint8_t kHBaseSizeOfFamilyLength_ = kHBaseSizeOfByte_; + const uint64_t kHBaseSizeOfTimestamp_ = kHBaseSizeOfLong_; + const uint8_t kHBaseSizeOfKeyType_ = kHBaseSizeOfByte_; + const uint32_t kHBaseSizeOfTimestampAndKey_ = kHBaseSizeOfTimestamp_ + kHBaseSizeOfKeyType_; + const uint32_t kHBaseSizeOfKeyInfrastructure_ = + kHBaseSizeOfRowLength_ + kHBaseSizeOfFamilyLength_ + kHBaseSizeOfTimestampAndKey_; + const uint32_t kHBaseSizeOfKeyValueInfrastructure_ = + kHBaseSizeOfKeyLength_ + kHBaseSizeOfValueLength_; + + uint32_t cell_block_start_offset_ = 0; + uint32_t cell_block_length_ = 0; + uint32_t cur_pos_ = 0; + bool end_of_cell_block_ = false; +}; + +} /* namespace hbase */ -- 1.8.3.1