From fc4e965c05c67317a2b275a6440be8c68c48ccda Mon Sep 17 00:00:00 2001 From: Sudeep Sunthankar Date: Thu, 8 Dec 2016 10:13:34 +1100 Subject: [PATCH] CellScanner first cut to be used by ResultScanner. diff --git a/hbase-native-client/core/cell.cc b/hbase-native-client/core/cell.cc index 5129bc9..c71b712 100644 --- a/hbase-native-client/core/cell.cc +++ b/hbase-native-client/core/cell.cc @@ -18,16 +18,19 @@ */ #include "core/cell.h" +#include #include - namespace hbase { -Cell::Cell(const std::string &row, const std::string &family, - const std::string &qualifier, const long ×tamp, - const std::string &value, const hbase::CellType &cell_type) - : row_(row), family_(family), qualifier_(qualifier), timestamp_(timestamp), - cell_type_(cell_type), value_(value), sequence_id_(0) { - +Cell::Cell(const std::string &row, const std::string &family, const std::string &qualifier, + const long ×tamp, const std::string &value, const hbase::CellType &cell_type) + : row_(row), + family_(family), + qualifier_(qualifier), + timestamp_(timestamp), + cell_type_(cell_type), + value_(value), + sequence_id_(0) { if (0 == row.size()) throw std::runtime_error("Row size should be greater than 0"); @@ -38,20 +41,147 @@ Cell::Cell(const std::string &row, const std::string &family, throw std::runtime_error("Timestamp should be greater than 0"); } -Cell::~Cell() {} +Cell::~Cell() { +} + +const std::string &Cell::Row() const { + return row_; +} + +const std::string &Cell::Family() const { + return family_; +} + +const std::string &Cell::Qualifier() const { + return qualifier_; +} + +unsigned long Cell::Timestamp() const { + return timestamp_; +} + +const std::string &Cell::Value() const { + return value_; +} + +hbase::CellType Cell::Type() const { + return cell_type_; +} + +long Cell::SequenceId() const { + return sequence_id_; +} -const std::string &Cell::Row() const { return row_; } +Cell *Cell::ParseCellData(const std::string &cell_data) { + DLOG(INFO)<< "cell_data.size() = " << cell_data.size(); -const std::string &Cell::Family() const { return family_; } + int offset = 0; + unsigned int cell_size_length; + unsigned int *pSize = (unsigned int *)&cell_data[offset]; // pCurrent; + cell_size_length = *pSize; + SwapByteOrder(cell_size_length); -const std::string &Cell::Qualifier() const { return qualifier_; } + // Key length offset starts from kHBaseSizeOfInt and not 0 coz @ o we have + // cell_size_length + int key_length_offset = kHBaseSizeOfInt; + pSize = (unsigned int *)&cell_data[key_length_offset]; + unsigned int key_length = *pSize; + SwapByteOrder(key_length); -unsigned long Cell::Timestamp() const { return timestamp_; } + // Value offset is @ key_len_offset + sizeof(key_len) + int value_length_offset = key_length_offset + kHBaseSizeOfKeyLength; + pSize = (unsigned int *)&cell_data[value_length_offset]; + unsigned int value_length = *pSize; + SwapByteOrder(value_length); -const std::string &Cell::Value() const { return value_; } + // Row length is @ kHBaseKeyValueInfrastructeSize + sizeof(cell_size_length) + int row_length_offset = kHBaseSizeOfKeyValueInfrastructure + kHBaseSizeOfInt; + int row_offset = row_length_offset + kHBaseSizeOfRowLength; + unsigned short *pRowLength = (unsigned short *)&cell_data[row_length_offset]; + unsigned short row_length = *pRowLength; + SwapByteOrder2Bytes(row_length); + std::string row(cell_data, row_offset, row_length); -hbase::CellType Cell::Type() const { return cell_type_; } + // Column family length is @ row_offset + row_length + int column_family_length_offset = row_offset + row_length; + int column_family_offset = + column_family_length_offset + kHBaseSizeOfFamilyLength; + unsigned char column_family_length = + cell_data[column_family_length_offset];// 1 byte + std::string column_family(cell_data, column_family_offset, + column_family_length); -long Cell::SequenceId() const { return sequence_id_; } + // Column Qualifier starts @ column_family_offset + column_family_lengthh + int column_qualifier_offset = column_family_offset + column_family_length; + int column_qualifier_length = + key_length - + (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure); + std::string column_qualifier(cell_data, column_qualifier_offset, + column_qualifier_length); -} /* namespace hbase */ + // Timestamp starts @ column_qualifier_offset + column_qualifier_length + int timestamp_offset = column_qualifier_offset + column_qualifier_length; + unsigned long timestamp = 0L; + for (int i = timestamp_offset; i < (timestamp_offset + kHBaseSizeOfTimestamp); + i++) { + timestamp <<= 8; + timestamp ^= cell_data[i] & 0xFF; + } + + // Key Type starts @ timestamp_offset + kHBaseSizeOfTimestamp + int key_type_offset = timestamp_offset + kHBaseSizeOfTimestamp; + unsigned char key_type = + static_cast(cell_data[key_type_offset]);// 1 byte + + // Value starts @ key_type_offset + kHBaseSizeOfKeyType + int value_offset = key_type_offset + kHBaseSizeOfKeyType; + std::string value(cell_data, value_offset, value_length); +#if 0 + DLOG(INFO) << "cell_size_length:- " << cell_size_length; + DLOG(INFO) << "key_length:- " << key_length; + DLOG(INFO) << "value_length:- " << value_length; + + DLOG(INFO) << "row_length_offset:- " << row_length_offset; + DLOG(INFO) << "row_offset:- " << row_offset; + DLOG(INFO) << "row_length:- " << row_length; + DLOG(INFO) << "row:- " << row; + + DLOG(INFO) << "column_family_length_offset:- " << column_family_length_offset; + DLOG(INFO) << "column_family_offset:- " << column_family_offset; + DLOG(INFO) << "column_family_length:- " + << static_cast(column_family_length); + DLOG(INFO) << "column:- " << column_family; + + DLOG(INFO) << "column_qualifier_offset:- " << column_qualifier_offset; + DLOG(INFO) << "column_qualifier_length:- " << column_qualifier_length; + DLOG(INFO) << "column_qualifier:- " << column_qualifier; + + DLOG(INFO) << "timestamp_offset:- " << timestamp_offset; + DLOG(INFO) << "timestamp:- " << timestamp; + + DLOG(INFO) << "key_type_offset:- " << key_type_offset; + DLOG(INFO) << "key_type:- " << static_cast(key_type); + + DLOG(INFO) << "value_offset:- " << value_offset; + DLOG(INFO) << "value:- " << value; +#endif + return Cell::CreateCell(row, column_family, column_qualifier, timestamp, + value, static_cast(key_type)); +} + +Cell *Cell::CreateCell(const std::string &row, const std::string &family, + const std::string &qualifier, const long ×tamp, + const std::string &value, const hbase::CellType &cell_type) { + Cell *cell = new Cell(row, family, qualifier, timestamp, value, cell_type); + return cell; +} + +void Cell::SwapByteOrder(uint32_t &ui) { + ui = (ui >> 24) | ((ui << 8) & 0x00FF0000) | ((ui >> 8) & 0x0000FF00) | (ui << 24); +} + +void Cell::SwapByteOrder2Bytes(unsigned short &us) { + us = ((((us) >> 8) & 0x00FF) | (((us) << 8) & 0xFF00)); +} +} +/* namespace hbase */ diff --git a/hbase-native-client/core/cell.h b/hbase-native-client/core/cell.h index 2b15ad6..525801c 100644 --- a/hbase-native-client/core/cell.h +++ b/hbase-native-client/core/cell.h @@ -34,10 +34,9 @@ enum CellType { }; class Cell { -public: - Cell(const std::string &row, const std::string &family, - const std::string &qualifier, const long ×tamp, - const std::string &value, const hbase::CellType &cell_type); + public: + Cell(const std::string &row, const std::string &family, const std::string &qualifier, + const long ×tamp, const std::string &value, const hbase::CellType &cell_type); virtual ~Cell(); const std::string &Row() const; const std::string &Family() const; @@ -46,8 +45,12 @@ public: const std::string &Value() const; CellType Type() const; long SequenceId() const; + static Cell *ParseCellData(const std::string &cell_data); + static Cell *CreateCell(const std::string &row, const std::string &family, + const std::string &qualifier, const long ×tamp, + const std::string &value, const hbase::CellType &cell_type); -private: + private: std::string row_; std::string family_; std::string qualifier_; @@ -55,6 +58,46 @@ private: hbase::CellType cell_type_; std::string value_; long sequence_id_; + + /** + * Size of boolean in bytes + */ + static const int kHBaseSizeOfBoolean = sizeof(char) / sizeof(char); + + /** + * Size of byte in bytes + */ + static const int kHBaseSizeOfByte = kHBaseSizeOfBoolean; + + /** + * Size of int in bytes + */ + static const int kHBaseSizeOfInt = sizeof(int) / sizeof(char); + + /** + * Size of long in bytes + */ + static const int kHBaseSizeOfLong = sizeof(long) / sizeof(char); + + /** + * Size of Short in bytes + */ + static const int kHBaseSizeOfShort = sizeof(short) / sizeof(char); + + static const int kHBaseSizeOfKeyLength = kHBaseSizeOfInt; + static const int kHBaseSizeOfValueLength = kHBaseSizeOfInt; + static const int kHBaseSizeOfRowLength = kHBaseSizeOfShort; + static const int kHBaseSizeOfFamilyLength = kHBaseSizeOfByte; + static const int kHBaseSizeOfTimestamp = kHBaseSizeOfLong; + static const int kHBaseSizeOfKeyType = kHBaseSizeOfByte; + static const int kHBaseSizeOfTimestampAndKey = kHBaseSizeOfTimestamp + kHBaseSizeOfKeyType; + static const int kHBaseSizeOfKeyInfrastructure = kHBaseSizeOfRowLength + kHBaseSizeOfFamilyLength + + kHBaseSizeOfTimestampAndKey; + static const int kHBaseSizeOfKeyValueInfrastructure = kHBaseSizeOfKeyLength + + kHBaseSizeOfValueLength; + + static void SwapByteOrder(uint32_t &ui); + static void SwapByteOrder2Bytes(unsigned short &us); }; } /* namespace hbase */ diff --git a/hbase-native-client/core/cell_scanner.cc b/hbase-native-client/core/cell_scanner.cc new file mode 100644 index 0000000..188398c --- /dev/null +++ b/hbase-native-client/core/cell_scanner.cc @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cell_scanner.h" + +#include +namespace hbase { + +CellScanner::CellScanner() { +} + +CellScanner::CellScanner(const CellScanner &cell_scanner) { + data_length_ = cell_scanner.data_length_; + cur_pos_ = 0; + cells_read_ = 0; + beyond_block_data_ = false; + cell_block_data_.reset(new char(*cell_scanner.cell_block_data_)); + current_cell_.reset(new Cell(*cell_scanner.current_cell_)); +} + +CellScanner::~CellScanner() { +} + +void CellScanner::SetData(char *cell_block_data, int data_length) { + + cell_block_data_.reset(new char[data_length]); + std::memcpy(cell_block_data_.get(), cell_block_data, data_length); + data_length_ = data_length; + cur_pos_ = 0; +} + +char *CellScanner::GetData() const { + return cell_block_data_.get(); +} + +int CellScanner::GetDataLength() const { + return data_length_; +} + +bool CellScanner::Advance() { + if (beyond_block_data_) { + return false; + } + + unsigned int *pSize = (unsigned int*) cell_block_data_.get(); + unsigned int cellSize = *pSize; + SwapByteOrder(cellSize); + int total_size = cellSize + 4; + if ((cur_pos_ + total_size) > data_length_) { + beyond_block_data_ = true; + return false; + } + + std::unique_ptr current_cell_data = std::make_unique < std::string + > (cell_block_data_.get(), total_size); + cur_pos_ += total_size; + cells_read_++; + + current_cell_.reset(new Cell(*Cell::ParseCellData(*current_cell_data))); + return true; +} + +Cell *CellScanner::Current() { + return current_cell_.get(); +} + +void CellScanner::SwapByteOrder(unsigned int &ui) { + ui = (ui >> 24) | ((ui << 8) & 0x00FF0000) | ((ui >> 8) & 0x0000FF00) | (ui << 24); +} + +} /* namespace hbase */ diff --git a/hbase-native-client/core/cell_scanner.h b/hbase-native-client/core/cell_scanner.h new file mode 100644 index 0000000..3acc91f --- /dev/null +++ b/hbase-native-client/core/cell_scanner.h @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include +#include "core/cell.h" + +namespace hbase { + +class CellScanner { + public: + CellScanner(); + CellScanner(const CellScanner &cell_scanner); + ~CellScanner(); + void SetData(char *cell_block_data, int data_length); + char *GetData() const; + int GetDataLength() const; + bool Advance(); + Cell *Current(); + + private: + int data_length_ = 0; + int cur_pos_ = 0; + long cells_read_ = 0; + bool beyond_block_data_ = false; + + std::unique_ptr cell_block_data_; + std::unique_ptr current_cell_; + + void SwapByteOrder(unsigned int &ui); +}; + +} /* namespace hbase */ -- 1.8.3.1