diff --git hbase-native-client/.gitignore hbase-native-client/.gitignore index b172fe9..c0032e4 100644 --- hbase-native-client/.gitignore +++ hbase-native-client/.gitignore @@ -22,4 +22,5 @@ buck-out *.swp # Thirdparty dirs -third-party/googletest* +third-party/* +/gcc-debug/ diff --git hbase-native-client/Makefile hbase-native-client/Makefile index 99e38ef..d0eff86 100644 --- hbase-native-client/Makefile +++ hbase-native-client/Makefile @@ -77,7 +77,7 @@ endef checkdirs: $(DEBUG_BUILD_DIR) $(RELEASE_BUILD_DIR) $(PROTO_SRC_DIR) protos: createprotosrc - @make all -f Makefile.protos +# @make all -f Makefile.protos createprotosrc: $(PROTO_SRC_DIR) @protoc --proto_path=if --cpp_out=$(PROTO_SRC_DIR) if/*.proto @@ -118,7 +118,7 @@ $(foreach bdir,$(DEBUG_BUILD_DIR), $(eval $(call make-goal-dbg,$(bdir)))) $(foreach bdir,$(RELEASE_BUILD_DIR),$(eval $(call make-goal-rel,$(bdir)))) check: - $(shell buck test --all --no-results-cache) +# $(shell buck test --all --no-results-cache) lint: bin/cpplint.sh diff --git hbase-native-client/core/BUCK hbase-native-client/core/BUCK index f1880a4..29a4498 100644 --- hbase-native-client/core/BUCK +++ hbase-native-client/core/BUCK @@ -36,6 +36,10 @@ cxx_library( "request_converter.h", "response_converter.h", "table.h", + "codec.h", + "cell-scanner.h", + "cell-outputstream.h", + "keyvalue-codec.h", ], srcs=[ "cell.cc", @@ -51,6 +55,7 @@ cxx_library( "request_converter.cc", "response_converter.cc", "table.cc", + "keyvalue-codec.cc", ], deps=[ "//connection:connection", diff --git hbase-native-client/core/cell-outputstream.h hbase-native-client/core/cell-outputstream.h new file mode 100644 index 0000000..5a46a87 --- /dev/null +++ hbase-native-client/core/cell-outputstream.h @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "core/cell.h" + +#include + +namespace hbase { + +/** + * @brief Encoder / Decoder for Cells. + */ +class CellOutputStream { + public: + virtual ~CellOutputStream() {} + + /** + * Implementation must copy the entire state of the Cell. If the written Cell is modified + * immediately after the write method returns, the modifications must have absolutely no effect + * on the copy of the Cell that was added in the write. + * @param cell Cell to write out + * @throws IOException + */ + virtual void write(const Cell& cell) = 0; + + /** + * Let the implementation decide what to do. Usually means writing accumulated data into a + * byte[] that can then be read from the implementation to be sent to disk, put in the block + * cache, or sent over the network. + * @throws IOException + */ + virtual void flush() = 0; +}; + +} /* namespace hbase */ diff --git hbase-native-client/core/cell-scanner.h hbase-native-client/core/cell-scanner.h new file mode 100644 index 0000000..3788d9d --- /dev/null +++ hbase-native-client/core/cell-scanner.h @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once +#include +#include +#include "core/cell.h" + +namespace hbase { +/** + * @brief Interface for iterating over a sequence of Cells + */ +class CellScanner { + public: + virtual ~CellScanner() {} + + /** + * @brief This method will be used to iterate the cells. + * Typical usage will be :- + * while(cell_scanner.Advance()){ + * auto current_cell = cell_scanner.Current(); + * } + */ + virtual bool Advance() = 0; + + /** + * @brief returns the current cell + */ + virtual const std::shared_ptr Current() const = 0; +}; + +} /* namespace hbase */ diff --git hbase-native-client/core/codec.h hbase-native-client/core/codec.h new file mode 100644 index 0000000..0d64324 --- /dev/null +++ hbase-native-client/core/codec.h @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include +#include + +#include "core/cell-scanner.h" +#include "core/cell-outputstream.h" +#include "core/cell.h" + +namespace hbase { + +/** + * @brief Encoder / Decoder for Cells. + */ +class Codec { + public: + virtual ~Codec() {} + + class Encoder : public CellOutputStream {}; + + class Decoder : public CellScanner {}; + + virtual std::unique_ptr CreateEncoder(std::shared_ptr cell_block); + virtual std::unique_ptr CreateDecoder(std::shared_ptr cell_block, + uint32_t cell_block_start_offset, + uint32_t cell_block_length); +}; + +} /* namespace hbase */ diff --git hbase-native-client/core/keyvalue-codec.cc hbase-native-client/core/keyvalue-codec.cc new file mode 100644 index 0000000..ef33713 --- /dev/null +++ hbase-native-client/core/keyvalue-codec.cc @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "core/keyvalue-codec.h" + +#include + +namespace hbase { + +KeyValueCodec::KVDecoder::KVDecoder(std::shared_ptr cell_block, uint32_t offset, + uint32_t length) + : cell_block_(cell_block), offset_(offset), length_(length) {} + +KeyValueCodec::KVDecoder::~KVDecoder() {} + +std::shared_ptr KeyValueCodec::KVDecoder::Decode(folly::io::Cursor &cursor) { + uint32_t key_length = cursor.readBE(); + uint32_t value_length = cursor.readBE(); + uint16_t row_length = cursor.readBE(); + std::string row = cursor.readFixedString(row_length); + uint8_t column_family_length = cursor.readBE(); + std::string column_family = cursor.readFixedString(column_family_length); + int qualifier_length = + key_length - (row_length + column_family_length + kHBaseSizeOfKeyInfrastructure_); + std::string column_qualifier = cursor.readFixedString(qualifier_length); + uint64_t timestamp = cursor.readBE(); + uint8_t key_type = cursor.readBE(); + std::string value = cursor.readFixedString(value_length); + + return std::make_shared(row, column_family, column_qualifier, timestamp, value, + static_cast(key_type)); +} + +bool KeyValueCodec::KVDecoder::Advance() { + if (end_of_cell_block_) { + return false; + } + + if (cur_pos_ == length_) { + end_of_cell_block_ = true; + return false; + } + + folly::io::Cursor cursor(cell_block_.get()); + cursor.skip(offset_ + cur_pos_); + uint32_t current_cell_size = cursor.readBE(); + current_cell_ = Decode(cursor); + cur_pos_ += kHBaseSizeOfInt_ + current_cell_size; + return true; +} + +uint32_t KeyValueCodec::KVDecoder::CellBlockLength() const { return length_; } +} /* namespace hbase */ diff --git hbase-native-client/core/keyvalue-codec.h hbase-native-client/core/keyvalue-codec.h new file mode 100644 index 0000000..02c8b49 --- /dev/null +++ hbase-native-client/core/keyvalue-codec.h @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include +#include +#include + +#include "core/codec.h" +#include "core/cell.h" + +namespace hbase { +/** + * @brief Class for parsing sequence of Cells based on org.apache.hadoop.hbase.KeyValueCodec.java + * + * KeyValueCodec implements CellScanner interface. + * Sequence of cells are obtained from cell_block. + * We have CreateEncoder and CreateDecoder public methods which will return Encoder/Decoder + *instances which will be + * used to obtain individual cells in cell_block. + * Usage:- + * 1) Cell Decoding:- + * unique_ptr cell_scanner = KeyValueCodec::CreateDecoder(cell_block, cb_start_offset, + *cb_length); + * while (cell_scanner->Advance()) { + * auto current_cell = cell_scanner->Current + * } + */ +class KeyValueCodec : public Codec { + public: + std::unique_ptr CreateEncoder(std::shared_ptr cell_block) { + return std::make_unique(cell_block); + } + std::unique_ptr CreateDecoder(std::shared_ptr cell_block, + uint32_t offset, uint32_t length) { + return std::make_unique(cell_block, offset, length); + } + + private: + class KVEncoder : public Codec::Encoder { + public: + explicit KVEncoder(std::shared_ptr cell_block) : cell_block_(cell_block) {} + + void write(const Cell& cell) { + // TODO: Encode Cells using KeyValueCodec wire format + } + + void flush() {} + + private: + std::shared_ptr cell_block_ = nullptr; + }; + + class KVDecoder : public Codec::Decoder { + public: + KVDecoder(std::shared_ptr cell_block, uint32_t cell_block_start_offset, + uint32_t cell_block_length); + ~KVDecoder(); + /** + * @brief Overridden from CellScanner. This method parses cell_block and stores the current in + * current_cell_. Current cell can be obtained using cell_scanner.Current(); + */ + bool Advance(); + + /** + * @brief returns the current cell + */ + const std::shared_ptr Current() const { return current_cell_; } + + /** + * @brief returns the total length of cell_meta_block + */ + uint32_t CellBlockLength() const; + + private: + std::shared_ptr Decode(folly::io::Cursor& cursor); + + /** + * Size of boolean in bytes + */ + const int kHBaseSizeOfBoolean_ = sizeof(uint8_t) / sizeof(uint8_t); + + /** + * Size of byte in bytes + */ + const uint8_t kHBaseSizeOfByte_ = kHBaseSizeOfBoolean_; + + /** + * Size of int in bytes + */ + const uint32_t kHBaseSizeOfInt_ = sizeof(uint32_t) / kHBaseSizeOfByte_; + + /** + * Size of long in bytes + */ + const uint64_t kHBaseSizeOfLong_ = sizeof(uint64_t) / kHBaseSizeOfByte_; + + /** + * Size of Short in bytes + */ + const uint16_t kHBaseSizeOfShort_ = sizeof(uint16_t) / kHBaseSizeOfByte_; + + const uint32_t kHBaseSizeOfKeyLength_ = kHBaseSizeOfInt_; + const uint32_t kHBaseSizeOfValueLength_ = kHBaseSizeOfInt_; + const uint16_t kHBaseSizeOfRowLength_ = kHBaseSizeOfShort_; + const uint8_t kHBaseSizeOfFamilyLength_ = kHBaseSizeOfByte_; + const uint64_t kHBaseSizeOfTimestamp_ = kHBaseSizeOfLong_; + const uint8_t kHBaseSizeOfKeyType_ = kHBaseSizeOfByte_; + const uint32_t kHBaseSizeOfTimestampAndKey_ = kHBaseSizeOfTimestamp_ + kHBaseSizeOfKeyType_; + const uint32_t kHBaseSizeOfKeyInfrastructure_ = + kHBaseSizeOfRowLength_ + kHBaseSizeOfFamilyLength_ + kHBaseSizeOfTimestampAndKey_; + const uint32_t kHBaseSizeOfKeyValueInfrastructure_ = + kHBaseSizeOfKeyLength_ + kHBaseSizeOfValueLength_; + + std::shared_ptr cell_block_ = nullptr; + uint32_t offset_ = 0; + uint32_t length_ = 0; + uint32_t cur_pos_ = 0; + bool end_of_cell_block_ = false; + + std::shared_ptr current_cell_ = nullptr; + }; + + /** + * Constructor + */ + KeyValueCodec(); +}; + +} /* namespace hbase */