From 685e70da6bf822118189a083c5ed877221349073 Mon Sep 17 00:00:00 2001 From: Sudeep Sunthankar Date: Wed, 18 Jan 2017 01:16:34 +1100 Subject: [PATCH] Cell Scanner and KeyValue Codec classes diff --git a/hbase-native-client/core/BUCK b/hbase-native-client/core/BUCK index 0d1bc93..42a7112 100644 --- a/hbase-native-client/core/BUCK +++ b/hbase-native-client/core/BUCK @@ -36,6 +36,8 @@ cxx_library( "request_converter.h", "response_converter.h", "table.h", + "cell_scanner.h", + "keyvalue_codec.h", ], srcs=[ "cell.cc", @@ -51,6 +53,8 @@ cxx_library( "request_converter.cc", "response_converter.cc", "table.cc", + "cell_scanner.cc", + "keyvalue_codec.cc", ], deps=[ "//connection:connection", diff --git a/hbase-native-client/core/cell_scanner.cc b/hbase-native-client/core/cell_scanner.cc new file mode 100644 index 0000000..b38a912 --- /dev/null +++ b/hbase-native-client/core/cell_scanner.cc @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/cell_scanner.h" +#include +#include "core/keyvalue_codec.h" + +namespace hbase { + +CellScanner::CellScanner(std::shared_ptr cell_block, uint32_t cell_block_length) + : cell_block_length_(cell_block_length) { + cell_block_ = cell_block; +} + +CellScanner::~CellScanner() {} + +int CellScanner::CellDataLength() const { return cell_block_length_; } + +bool CellScanner::Advance() { + if (beyond_block_data_) { + return false; + } + + unsigned int cell_size = ntohl(*(unsigned int *)(&cell_block_.get()[cur_pos_])); + uint32_t total_cell_size = cell_size + 4; + + if ((cur_pos_ + total_cell_size) > cell_block_length_) { + beyond_block_data_ = true; + return false; + } + + KeyValueCodec kv_codec; + current_cell_.reset(kv_codec.Decode(const_cast(cell_block_.get()), cur_pos_)); + + cur_pos_ += total_cell_size; + cells_read_++; + return true; +} + +std::shared_ptr CellScanner::Current() { return current_cell_; } + +} /* namespace hbase */ diff --git a/hbase-native-client/core/cell_scanner.h b/hbase-native-client/core/cell_scanner.h new file mode 100644 index 0000000..425e0ac --- /dev/null +++ b/hbase-native-client/core/cell_scanner.h @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include +#include "core/cell.h" + +namespace hbase { + +class CellScanner { + public: + CellScanner(std::shared_ptr cell_block, uint32_t cell_block_length); + ~CellScanner(); + int CellDataLength() const; + bool Advance(); + std::shared_ptr Current(); + + private: + uint32_t cell_block_length_ = 0; + int cur_pos_ = 0; + int32_t cells_read_ = 0; + bool beyond_block_data_ = false; + + std::shared_ptr cell_block_ = nullptr; + std::shared_ptr current_cell_ = nullptr; +}; + +} /* namespace hbase */ diff --git a/hbase-native-client/core/keyvalue_codec.cc b/hbase-native-client/core/keyvalue_codec.cc new file mode 100644 index 0000000..ec7ff28 --- /dev/null +++ b/hbase-native-client/core/keyvalue_codec.cc @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/keyvalue_codec.h" +#include +#include + +namespace hbase { + +KeyValueCodec::KeyValueCodec() {} + +KeyValueCodec::~KeyValueCodec() {} + +Cell *KeyValueCodec::Decode(const char *cell_data, int start_pos) { + unsigned int *size = (unsigned int *)&cell_data[start_pos]; + unsigned int cell_size_length = ntohl(*size); + start_pos += kHBaseSizeOfInt_; + + // Key length offset starts from kHBaseSizeOfInt and not 0 coz @ 0 we have + // cell_size_length + int key_length_offset = start_pos; + size = (unsigned int *)&cell_data[key_length_offset]; + unsigned int key_length = ntohl(*size); + + // Value offset is @ key_len_offset + sizeof(key_len) + int value_length_offset = key_length_offset + kHBaseSizeOfKeyLength_; + size = (unsigned int *)&cell_data[value_length_offset]; + unsigned int value_length = ntohl(*size); + + // Row length is @ kHBaseKeyValueInfrastructeSize + sizeof(cell_size_length) + int row_length_offset = kHBaseSizeOfKeyValueInfrastructure_ + start_pos; + int row_offset = row_length_offset + kHBaseSizeOfRowLength_; + uint16_t *pRowLength = (uint16_t *) &cell_data[row_length_offset]; + uint16_t row_length = ntohs(*pRowLength); + std::string row(&cell_data[row_offset], row_length); + + // Column family length is @ row_offset + row_length + int column_family_length_offset = row_offset + row_length; + int column_family_offset = column_family_length_offset + kHBaseSizeOfFamilyLength_; + unsigned char column_family_length = cell_data[column_family_length_offset]; // 1 byte + std::string column_family(&cell_data[column_family_offset], column_family_length); + + // Column Qualifier starts @ column_family_offset + column_family_length + int column_qualifier_offset = column_family_offset + column_family_length; + int column_qualifier_length = + key_length - (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure_); + std::string column_qualifier(&cell_data[column_qualifier_offset], column_qualifier_length); + + // Timestamp starts @ column_qualifier_offset + column_qualifier_length + int timestamp_offset = column_qualifier_offset + column_qualifier_length; + uint64_t timestamp = 0L; + for (int i = timestamp_offset; i < (timestamp_offset + kHBaseSizeOfTimestamp_); i++) { + timestamp <<= 8; + timestamp ^= cell_data[i] & 0xFF; + } + + // Key Type starts @ timestamp_offset + kHBaseSizeOfTimestamp + int key_type_offset = timestamp_offset + kHBaseSizeOfTimestamp_; + unsigned char key_type = static_cast(cell_data[key_type_offset]); // 1 byte + + // Value starts @ key_type_offset + kHBaseSizeOfKeyType + int value_offset = key_type_offset + kHBaseSizeOfKeyType_; + std::string value(&cell_data[value_offset], value_length); + + return new Cell(row, column_family, column_qualifier, timestamp, value, + static_cast(key_type)); +} + +} /* namespace hbase */ diff --git a/hbase-native-client/core/keyvalue_codec.h b/hbase-native-client/core/keyvalue_codec.h new file mode 100644 index 0000000..c71b85d --- /dev/null +++ b/hbase-native-client/core/keyvalue_codec.h @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "core/cell.h" + +namespace hbase { + +class KeyValueCodec { + public: + KeyValueCodec(); + ~KeyValueCodec(); + Cell* Decode(const char* cell_data, int start_pos); + + private: + /** + * Size of boolean in bytes + */ + const int kHBaseSizeOfBoolean_ = sizeof(char) / sizeof(char); + + /** + * Size of byte in bytes + */ + const int kHBaseSizeOfByte_ = kHBaseSizeOfBoolean_; + + /** + * Size of int in bytes + */ + const int kHBaseSizeOfInt_ = sizeof(int) / kHBaseSizeOfByte_; + + /** + * Size of long in bytes + */ + const int kHBaseSizeOfLong_ = sizeof(uint64_t) / kHBaseSizeOfByte_; + + /** + * Size of Short in bytes + */ + const int kHBaseSizeOfShort_ = sizeof(uint16_t) / kHBaseSizeOfByte_; + + const int kHBaseSizeOfKeyLength_ = kHBaseSizeOfInt_; + const int kHBaseSizeOfValueLength_ = kHBaseSizeOfInt_; + const int kHBaseSizeOfRowLength_ = kHBaseSizeOfShort_; + const int kHBaseSizeOfFamilyLength_ = kHBaseSizeOfByte_; + const int kHBaseSizeOfTimestamp_ = kHBaseSizeOfLong_; + const int kHBaseSizeOfKeyType_ = kHBaseSizeOfByte_; + const int kHBaseSizeOfTimestampAndKey_ = kHBaseSizeOfTimestamp_ + kHBaseSizeOfKeyType_; + const int kHBaseSizeOfKeyInfrastructure_ = + kHBaseSizeOfRowLength_ + kHBaseSizeOfFamilyLength_ + kHBaseSizeOfTimestampAndKey_; + const int kHBaseSizeOfKeyValueInfrastructure_ = kHBaseSizeOfKeyLength_ + kHBaseSizeOfValueLength_; +}; + +} /* namespace hbase */ -- 1.8.3.1