From 95c31b2879439420870bf29acf344667debcd534 Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Tue, 5 Jun 2018 17:56:48 +0200 Subject: [PATCH] HBASE-20649 Validate HFiles do not have PREFIX_TREE DataBlockEncoding --- .../hbase/tool/PreUpgradeValidator.java | 71 +++++++++++++++++-- src/main/asciidoc/_chapters/ops_mgt.adoc | 46 ++++++++++++ 2 files changed, 110 insertions(+), 7 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java index 138af6a3f7..2a3736fa4b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java @@ -1,4 +1,4 @@ -/** +/* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file @@ -19,8 +19,13 @@ package org.apache.hadoop.hbase.tool; import java.io.IOException; +import java.util.Collection; import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; @@ -30,6 +35,9 @@ import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.util.AbstractHBaseTool; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -43,6 +51,7 @@ import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; * *

*/ @@ -52,21 +61,24 @@ public class PreUpgradeValidator extends AbstractHBaseTool { public static final String NAME = "pre-upgrade"; private static final Logger LOG = LoggerFactory.getLogger(PreUpgradeValidator.class); private static final byte[] DATA_BLOCK_ENCODING = Bytes.toBytes("DATA_BLOCK_ENCODING"); + private static final int DEFAULT_NUM_THREADS = 20; private boolean validateAll; private boolean validateDBE; + private boolean validateHFiles; /** * Check DataBlockEncodings for column families. * + * @param conf used configuration * @return DataBlockEncoding compatible with HBase 2 * @throws IOException if a remote or network exception occurs */ - private boolean validateDBE() throws IOException { + private boolean validateDBE(Configuration conf) throws IOException { int incompatibilities = 0; LOG.info("Validating Data Block Encodings"); - try (Connection connection = ConnectionFactory.createConnection(getConf()); + try (Connection connection = ConnectionFactory.createConnection(conf); Admin admin = connection.getAdmin()) { List tableDescriptors = admin.listTableDescriptors(); String encoding = ""; @@ -89,9 +101,8 @@ public class PreUpgradeValidator extends AbstractHBaseTool { if (incompatibilities > 0) { LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not " - + "upgrade until these encodings are converted to a supported one.", incompatibilities); - LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " - + "for instructions."); + + "upgrade until these encodings are converted to a supported one. " + + "Check https://s.apache.org/prefixtree for instructions.", incompatibilities); return false; } else { LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); @@ -99,23 +110,69 @@ public class PreUpgradeValidator extends AbstractHBaseTool { } } + /** + * Check HFile contents are readable by HBase 2. + * + * @param conf used configuration + * @return HFiles are compatible with HBase 2 + * @throws IOException if a remote or network exception occurs + */ + private boolean validateHFileContent(Configuration conf) throws IOException { + Path rootDir = FSUtils.getRootDir(conf); + LOG.info("Validating HFile contents under {}", rootDir.toString()); + Collection tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(conf), rootDir); + HFileCorruptionChecker checker = + new HFileCorruptionChecker(conf, createThreadPool(conf), false); + checker.checkTables(tableDirs); + + int checkedFiles = checker.getHFilesChecked(); + LOG.debug("Checked {} HFiles under {}", checkedFiles, rootDir.toString()); + Collection corrupted = checker.getCorrupted(); + for (Path p : corrupted) { + LOG.warn("Corrupt file: {}", p.toString()); + } + + if (!corrupted.isEmpty()) { + LOG.warn("There are {} corrupted HFiles. Change data block encodings before upgrading. " + + "Check https://s.apache.org/prefixtree for instructions.", corrupted.size()); + return false; + } else { + LOG.info("There are no incompatible HFiles under {}.", rootDir.toString()); + return true; + } + } + + private ExecutorService createThreadPool(Configuration conf) { + int numThreads = conf.getInt("preupgrade.numthreads", DEFAULT_NUM_THREADS); + return new ScheduledThreadPoolExecutor(numThreads, + Threads.newDaemonThreadFactory("preupgrade")); + } + @Override protected void addOptions() { addOptNoArg("all", "Run all pre-upgrade validations"); addOptNoArg("validateDBE", "Validate DataBlockEncoding are compatible on the cluster"); + addOptNoArg("validateHFiles", "Validate HFile contents are compatible"); } @Override protected void processOptions(CommandLine cmd) { validateAll = cmd.hasOption("all"); validateDBE = cmd.hasOption("validateDBE"); + validateHFiles = cmd.hasOption("validateHFiles"); } @Override protected int doWork() throws Exception { + Configuration conf = getConf(); boolean validationFailed = false; if (validateDBE || validateAll) { - if (validateDBE()) { + if (validateDBE(conf)) { + validationFailed = true; + } + } + if (validateHFiles || validateAll) { + if (!validateHFileContent(conf)) { validationFailed = true; } } diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 6fef714c91..cbea73cfd0 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -864,6 +864,52 @@ $ bin/hbase pre-upgrade -validateDBE This check validates all column families and print out any incompatibilities. To change `PREFIX_TREE` encoding to supported one check <>. +==== HFile content validation +Even though Data Block Encoding is changed from `PREFIX_TREE` you could still have HFiles that contain data encoded that way. +To verify that HFiles are readable with HBase 2 you can use _HFile content validator_. + +[source, bash] +---- +$ bin/hbase pre-upgrade -validateHFiles +---- + +The tool will log the corrupt HFiles and details about the root cause. +In case the problem is about PREFIX_TREE encoding you must change the encodings before upgrading to HBase 2. +To change `PREFIX_TREE` encoding to supported one check <>. + + +The following log message shows an example of a `PREFIX_TREE` HFile. + +---- +2018-06-05 16:20:46,976 WARN [preupgrade-pool1-t3] hbck.HFileCorruptionChecker: Found corrupt HFile hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e +org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e + at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:545) + at org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:611) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkHFile(HFileCorruptionChecker.java:101) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkColFamDir(HFileCorruptionChecker.java:185) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkRegionDir(HFileCorruptionChecker.java:323) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:408) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:399) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) + at java.lang.Thread.run(Thread.java:748) +Caused by: java.io.IOException: Invalid data block encoding type in file info: PREFIX_TREE + at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:58) + at org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.(HFileReaderImpl.java:246) + at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:538) + ... 14 more +Caused by: java.lang.IllegalArgumentException: No enum constant org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.PREFIX_TREE + at java.lang.Enum.valueOf(Enum.java:238) + at org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.valueOf(DataBlockEncoding.java:31) + at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:56) + ... 16 more +---- + [[ops.regionmgt]] == Region Management -- 2.17.0