From 46444cff5c813cfb6a57f7ab8acd078c0a322c06 Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Fri, 25 May 2018 15:03:17 +0200 Subject: [PATCH] HBASE-20592 Create a tool to verify tables do not have prefix tree encoding --- bin/hbase | 3 + .../hbase/tool/PreUpgradeValidator.java | 129 ++++++++++++++++++ .../_chapters/appendix_hfile_format.adoc | 2 +- src/main/asciidoc/_chapters/compression.adoc | 2 +- src/main/asciidoc/_chapters/ops_mgt.adoc | 22 +++ src/main/asciidoc/_chapters/upgrading.adoc | 4 + 6 files changed, 160 insertions(+), 2 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java diff --git a/bin/hbase b/bin/hbase index f1e2306cfb..4f1c854dfa 100755 --- a/bin/hbase +++ b/bin/hbase @@ -108,6 +108,7 @@ if [ $# = 0 ]; then echo " regionsplitter Run RegionSplitter tool" echo " rowcounter Run RowCounter tool" echo " cellcounter Run CellCounter tool" + echo " pre-upgrade Run Pre-Upgrade validator tool" echo " CLASSNAME Run the class named CLASSNAME" exit 1 fi @@ -471,6 +472,8 @@ elif [ "$COMMAND" = "rowcounter" ] ; then CLASS='org.apache.hadoop.hbase.mapreduce.RowCounter' elif [ "$COMMAND" = "cellcounter" ] ; then CLASS='org.apache.hadoop.hbase.mapreduce.CellCounter' +elif [ "$COMMAND" = "pre-upgrade" ] ; then + CLASS='org.apache.hadoop.hbase.tool.PreUpgradeValidator' else CLASS=$COMMAND fi diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java new file mode 100644 index 0000000000..56f00a5289 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java @@ -0,0 +1,129 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.io.IOException; +import java.util.List; + +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.Connection; +import org.apache.hadoop.hbase.client.ConnectionFactory; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.util.AbstractHBaseTool; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; + +/** + * Tool for validating that cluster can be upgraded from HBase 1.x to 2.0 + *

+ * Available validations: + *

+ *

+ */ +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) public class PreUpgradeValidator + extends AbstractHBaseTool { + + public static final String NAME = "pre-upgrade"; + private static final Logger LOG = LoggerFactory.getLogger(PreUpgradeValidator.class); + private static final byte[] DATA_BLOCK_ENCODING = Bytes.toBytes("DATA_BLOCK_ENCODING"); + private boolean validateAll; + private boolean validateDBE; + + /** + * Check DataBlockEncodings for column families. + * + * @return DataBlockEncoding compatible with HBase 2 + * @throws IOException if a remote or network exception occurs + */ + private boolean validateDBE() throws IOException { + int incompatibilities = 0; + + LOG.info("Validating Data Block Encodings"); + + try (Connection connection = ConnectionFactory.createConnection(getConf()); + Admin admin = connection.getAdmin()) { + List tableDescriptors = admin.listTableDescriptors(); + String encoding = ""; + + for (TableDescriptor td : tableDescriptors) { + ColumnFamilyDescriptor[] columnFamilies = td.getColumnFamilies(); + for (ColumnFamilyDescriptor cfd : columnFamilies) { + try { + encoding = Bytes.toString(cfd.getValue(DATA_BLOCK_ENCODING)); + // IllegalArgumentException will be thrown if encoding is incompatible with 2.0 + DataBlockEncoding.valueOf(encoding); + } catch (IllegalArgumentException e) { + incompatibilities++; + LOG.warn("Incompatible DataBlockEncoding for table: {}, cf: {}, encoding: {}", + td.getTableName().getNameAsString(), cfd.getNameAsString(), encoding); + } + } + } + } + + if (incompatibilities > 0) { + LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not " + + "upgrade until these encodings are converted to a supported one.", incompatibilities); + LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " + + "for instructions."); + return false; + } else { + LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); + return true; + } + } + + @Override + protected void addOptions() { + addOptNoArg("all", "Run all pre-upgrade validations"); + addOptNoArg("validateDBE", "Validate DataBlockEncoding are compatible on the cluster"); + } + + @Override + protected void processOptions(CommandLine cmd) { + validateAll = cmd.hasOption("all"); + validateDBE = cmd.hasOption("validateDBE"); + } + + @Override + protected int doWork() throws Exception { + boolean validationFailed = false; + if (validateDBE || validateAll) { + if (validateDBE()) { + validationFailed = true; + } + } + + return validationFailed ? 1 : 0; + } + + public static void main(String[] args) { + new PreUpgradeValidator().doStaticMain(args); + } +} diff --git a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc index 20f46d35f9..0f37beb3c8 100644 --- a/src/main/asciidoc/_chapters/appendix_hfile_format.adoc +++ b/src/main/asciidoc/_chapters/appendix_hfile_format.adoc @@ -321,7 +321,7 @@ Version 3 added two additional pieces of information to the reserved keys in the When reading a Version 3 HFile the presence of `MAX_TAGS_LEN` is used to determine how to deserialize the cells within a data block. Therefore, consumers must read the file's info block prior to reading any data blocks. -When writing a Version 3 HFile, HBase will always include `MAX_TAGS_LEN ` when flushing the memstore to underlying filesystem and when using prefix tree encoding for data blocks, as described in <>. +When writing a Version 3 HFile, HBase will always include `MAX_TAGS_LEN` when flushing the memstore to underlying filesystem. When compacting extant files, the default writer will omit `MAX_TAGS_LEN` if all of the files selected do not themselves contain any cells with tags. diff --git a/src/main/asciidoc/_chapters/compression.adoc b/src/main/asciidoc/_chapters/compression.adoc index c89e30574e..b2ff5ce699 100644 --- a/src/main/asciidoc/_chapters/compression.adoc +++ b/src/main/asciidoc/_chapters/compression.adoc @@ -125,7 +125,7 @@ The compression or codec type to use depends on the characteristics of your data In general, you need to weigh your options between smaller size and faster compression/decompression. Following are some general guidelines, expanded from a discussion at link:http://search-hadoop.com/m/lL12B1PFVhp1[Documenting Guidance on compression and codecs]. * If you have long keys (compared to the values) or many columns, use a prefix encoder. - FAST_DIFF is recommended, as more testing is needed for Prefix Tree encoding. + FAST_DIFF is recommended. * If the values are large (and not precompressed, such as images), use a data block compressor. * Use GZIP for [firstterm]_cold data_, which is accessed infrequently. GZIP compression uses more CPU resources than Snappy or LZO, but provides a higher compression ratio. diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index e15b73f865..9457638346 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -842,6 +842,28 @@ For general usage instructions, pass the `-h` option. The LoadTestTool has received many updates in recent HBase releases, including support for namespaces, support for tags, cell-level ACLS and visibility labels, testing security-related features, ability to specify the number of regions per server, tests for multi-get RPC calls, and tests relating to replication. +[[ops.pre-upgrade]] +=== Pre-Upgrade validator +Pre-Upgrade validator tool can be used to check the cluster for known incompatibilities before upgrading from HBase 1 to HBase 2. +To run all the checks use the `-all` flag. + +[source, bash] +---- +$ bin/hbase pre-upgrade -all +---- + +==== DataBlockEncoding validation +HBase 2.0 removed `PREFIX_TREE` Data Block Encoding from column families. +To verify that none of the column families are using incompatible Data Block Encodings in the cluster run the following command. + +[source, bash] +---- +$ bin/hbase pre-upgrade -validateDBE +---- + +This check validates all column families and print out any incompatibilities. +To change `PREFIX_TREE` encoding to supported one check <>. + [[ops.regionmgt]] == Region Management diff --git a/src/main/asciidoc/_chapters/upgrading.adoc b/src/main/asciidoc/_chapters/upgrading.adoc index da4430a39a..dea72abb77 100644 --- a/src/main/asciidoc/_chapters/upgrading.adoc +++ b/src/main/asciidoc/_chapters/upgrading.adoc @@ -419,6 +419,10 @@ This feature was removed because it as not being actively maintained. If interes sweet facility which improved random read latencies at the expensive of slowed writes, write the HBase developers list at _dev at hbase dot apache dot org_. +The prefix-tree encoding needs to be removed from all tables before upgrading to HBase 2.0+. +To do that first you need to change the encoding from PREFIX_TREE to something else that is supported in HBase 2.0. +After that you have to major compact the tables that were using PREFIX_TREE encoding before. +To check which column families are using incompatible data block encoding you can use <>. [[upgrade2.0.metrics]] .Changed metrics -- 2.17.0