From 83f369ced4e0b29571dd518ca82bd869d6cc25af Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Fri, 25 May 2018 15:03:17 +0200 Subject: [PATCH] HBASE-20592 Create a tool to verify tables do not have prefix tree encoding --- bin/hbase | 3 + .../tool/DataBlockEncodingValidator.java | 108 ++++++++++++++++++ .../_chapters/appendix_hfile_format.adoc | 2 +- src/main/asciidoc/_chapters/compression.adoc | 2 +- src/main/asciidoc/_chapters/ops_mgt.adoc | 14 +++ src/main/asciidoc/_chapters/upgrading.adoc | 4 + 6 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java diff --git a/bin/hbase b/bin/hbase index f1e2306cfb..ec2aed1f96 100755 --- a/bin/hbase +++ b/bin/hbase @@ -108,6 +108,7 @@ if [ $# = 0 ]; then echo " regionsplitter Run RegionSplitter tool" echo " rowcounter Run RowCounter tool" echo " cellcounter Run CellCounter tool" + echo " dbevalidator Run DataBlockEncodingValidator tool" echo " CLASSNAME Run the class named CLASSNAME" exit 1 fi @@ -471,6 +472,8 @@ elif [ "$COMMAND" = "rowcounter" ] ; then CLASS='org.apache.hadoop.hbase.mapreduce.RowCounter' elif [ "$COMMAND" = "cellcounter" ] ; then CLASS='org.apache.hadoop.hbase.mapreduce.CellCounter' +elif [ "$COMMAND" = "dbevalidator" ] ; then + CLASS='org.apache.hadoop.hbase.tool.DataBlockEncodingValidator' else CLASS=$COMMAND fi diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java new file mode 100644 index 0000000000..c299c637c2 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.util.List; + +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * Tool to validate that there are no column families with PREFIX_TREE Data Block Encoding set + * for any table. Before upgrading to HBase 2.0+ the cluster cannot have this encoding and + * it must be converted to a supported one. + * + * Usage: hbase dbevalidator + */ +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +public class DataBlockEncodingValidator extends Configured implements Tool { + + private static final String NAME = "dbevalidator"; + private static final Logger LOG = LoggerFactory.getLogger(DataBlockEncodingValidator.class); + private static final byte[] DATA_BLOCK_ENCODING = Bytes.toBytes("DATA_BLOCK_ENCODING"); + + @Override + public int run(String[] args) throws Exception { + if (args.length >= 1) { + printUsage(); + return 1; + } + int incompatibilities = 0; + + LOG.info("Looking for incompatible Data Block Encodings"); + + try (Connection connection = ConnectionFactory.createConnection(getConf()); + Admin admin = connection.getAdmin()) { + List tableDescriptors = admin.listTableDescriptors(); + String encoding = ""; + for (TableDescriptor td : tableDescriptors) { + ColumnFamilyDescriptor[] columnFamilies = td.getColumnFamilies(); + for (ColumnFamilyDescriptor cfd : columnFamilies) { + try { + encoding = Bytes.toString(cfd.getValue(DATA_BLOCK_ENCODING)); + // IllegalArgumentException will be thrown if encoding is incompatible with 2.0 + DataBlockEncoding.valueOf(encoding); + } catch (IllegalArgumentException e) { + incompatibilities++; + LOG.warn("Incompatible DataBlockEncoding for table: {}, cf: {}, encoding: {}", + td.getTableName().getNameAsString(), cfd.getNameAsString(), encoding); + } + } + } + } + + if (incompatibilities > 0) { + LOG.warn("There are {} column families with Data Block Encodings that are not compatible " + + "with HBase 2.0+. Do not upgrade until these encodings are converted to a " + + "supported one.", incompatibilities); + LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " + + "for instructions."); + return 1; + } + LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); + return 0; + } + + private void printUsage() { + System.err.println("DataBlockEncodingValidator tool checks that there are no incompatible " + + "Data Block Encodings in the cluster with HBase 2.0+ version."); + System.err.println(); + System.err.println("Usage: hbase " + NAME); + } + + public static void main(String[] args) throws Exception { + int errCode = ToolRunner.run(HBaseConfiguration.create(), + new DataBlockEncodingValidator(), args); + System.exit(errCode); + } + +} diff --git a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc index 20f46d35f9..0f37beb3c8 100644 --- a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc +++ b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc @@ -321,7 +321,7 @@ Version 3 added two additional pieces of information to the reserved keys in the When reading a Version 3 HFile the presence of `MAX_TAGS_LEN` is used to determine how to deserialize the cells within a data block. Therefore, consumers must read the file's info block prior to reading any data blocks. -When writing a Version 3 HFile, HBase will always include `MAX_TAGS_LEN ` when flushing the memstore to underlying filesystem and when using prefix tree encoding for data blocks, as described in <>. +When writing a Version 3 HFile, HBase will always include `MAX_TAGS_LEN` when flushing the memstore to underlying filesystem. When compacting extant files, the default writer will omit `MAX_TAGS_LEN` if all of the files selected do not themselves contain any cells with tags. diff --git a/src/main/asciidoc/_chapters/compression.adoc b/src/main/asciidoc/_chapters/compression.adoc index c89e30574e..b2ff5ce699 100644 --- a/src/main/asciidoc/_chapters/compression.adoc +++ b/src/main/asciidoc/_chapters/compression.adoc @@ -125,7 +125,7 @@ The compression or codec type to use depends on the characteristics of your data In general, you need to weigh your options between smaller size and faster compression/decompression. Following are some general guidelines, expanded from a discussion at link:http://search-hadoop.com/m/lL12B1PFVhp1[Documenting Guidance on compression and codecs]. * If you have long keys (compared to the values) or many columns, use a prefix encoder. - FAST_DIFF is recommended, as more testing is needed for Prefix Tree encoding. + FAST_DIFF is recommended. * If the values are large (and not precompressed, such as images), use a data block compressor. * Use GZIP for [firstterm]_cold data_, which is accessed infrequently. GZIP compression uses more CPU resources than Snappy or LZO, but provides a higher compression ratio. diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index e15b73f865..b2813b134a 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -842,6 +842,20 @@ For general usage instructions, pass the `-h` option. The LoadTestTool has received many updates in recent HBase releases, including support for namespaces, support for tags, cell-level ACLS and visibility labels, testing security-related features, ability to specify the number of regions per server, tests for multi-get RPC calls, and tests relating to replication. +[[ops.dbevalidator]] +=== DataBlockEncoding validator + +HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families. +To verify that none of the column families are using incompatible Data Block Encodings in the cluster run the following command. + +[source, bash] +---- +$ ./bin/hbase dbevalidator +---- + +DataBlockEncodingValidator tool checks all column families and print out any incompatibilities. +To change `PREFIX_TREE` encoding to supported one check <>. + [[ops.regionmgt]] == Region Management diff --git a/src/main/asciidoc/_chapters/upgrading.adoc b/src/main/asciidoc/_chapters/upgrading.adoc index da4430a39a..54575198b3 100644 --- a/src/main/asciidoc/_chapters/upgrading.adoc +++ b/src/main/asciidoc/_chapters/upgrading.adoc @@ -419,6 +419,10 @@ This feature was removed because it as not being actively maintained. If interes sweet facility which improved random read latencies at the expensive of slowed writes, write the HBase developers list at _dev at hbase dot apache dot org_. +The prefix-tree encoding needs to be removed from all tables before upgrading to HBase 2.0+. +To do first you need to change the encoding from PREFIX_TREE to something else that is supported in HBase 2.0. +After that you have to major compact the tables that were using PREFIX_TREE encoding before. +To check which column families are using incompatible data block encoding you can use <>. [[upgrade2.0.metrics]] .Changed metrics -- 2.17.0