From 70a0ca2e962aa9585fdc6f8c90078bf08556982a Mon Sep 17 00:00:00 2001 From: Balazs Meszaros Date: Tue, 26 Jun 2018 10:47:06 +0200 Subject: [PATCH] HBASE-20649 Validate HFiles do not have PREFIX_TREE DataBlockEncoding --- .../tool/DataBlockEncodingValidator.java | 5 +- .../hbase/tool/HFileContentValidator.java | 126 ++++++++++++++++++ .../hbase/tool/PreUpgradeValidator.java | 7 + src/main/asciidoc/_chapters/ops_mgt.adoc | 45 +++++++ 4 files changed, 180 insertions(+), 3 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java index e72521b122..c909725a61 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java @@ -76,9 +76,8 @@ public class DataBlockEncodingValidator extends AbstractHBaseTool { if (incompatibilities > 0) { LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not " - + "upgrade until these encodings are converted to a supported one.", incompatibilities); - LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " - + "for instructions."); + + "upgrade until these encodings are converted to a supported one. " + + "Check https://s.apache.org/prefixtree for instructions.", incompatibilities); } else { LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java new file mode 100644 index 0000000000..d60844bf95 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java @@ -0,0 +1,126 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.util.AbstractHBaseTool; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; + +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +public class HFileContentValidator extends AbstractHBaseTool { + + private static final Logger LOG = LoggerFactory.getLogger(HFileContentValidator.class); + + /** + * Check HFile contents are readable by HBase 2. + * + * @param conf used configuration + * @return number of HFiles corrupted HBase + * @throws IOException if a remote or network exception occurs + */ + private boolean validateHFileContent(Configuration conf) throws IOException { + FileSystem fileSystem = FSUtils.getCurrentFileSystem(conf); + + ExecutorService threadPool = createThreadPool(conf); + HFileCorruptionChecker checker; + + try { + checker = new HFileCorruptionChecker(conf, threadPool, false); + + Path rootDir = FSUtils.getRootDir(conf); + LOG.info("Validating HFile contents under {}", rootDir); + + Collection tableDirs = FSUtils.getTableDirs(fileSystem, rootDir); + checker.checkTables(tableDirs); + + Path archiveRootDir = new Path(rootDir, HConstants.HFILE_ARCHIVE_DIRECTORY); + LOG.info("Validating HFile contents under {}", archiveRootDir); + + List archiveTableDirs = FSUtils.getTableDirs(fileSystem, archiveRootDir); + checker.checkTables(archiveTableDirs); + } finally { + threadPool.shutdown(); + + try { + threadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + int checkedFiles = checker.getHFilesChecked(); + Collection corrupted = checker.getCorrupted(); + + if (corrupted.isEmpty()) { + LOG.info("Checked {} HFiles, none of them are corrupted.", checkedFiles); + LOG.info("There are no incompatible HFiles."); + + return true; + } else { + LOG.info("Checked {} HFiles, {} are corrupted.", checkedFiles, corrupted.size()); + + for (Path path : corrupted) { + LOG.info("Corrupted file: {}", path); + } + + LOG.info("Change data block encodings before upgrading. " + + "Check https://s.apache.org/prefixtree for instructions."); + + return false; + } + } + + private ExecutorService createThreadPool(Configuration conf) { + int availableProcessors = Runtime.getRuntime().availableProcessors(); + int numThreads = conf.getInt("hfilevalidator.numthreads", availableProcessors); + return Executors.newFixedThreadPool(numThreads, + Threads.getNamedThreadFactory("hfile-validator")); + } + + @Override + protected void addOptions() { + } + + @Override + protected void processOptions(CommandLine cmd) { + } + + @Override + protected int doWork() throws Exception { + return (validateHFileContent(getConf())) ? EXIT_SUCCESS : EXIT_FAILURE; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java index 7bf307484b..818004c272 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java @@ -38,6 +38,7 @@ import org.slf4j.LoggerFactory; *
    *
  • validate-cp: Validates Co-processors compatibility
  • *
  • validate-dbe: Check Data Block Encoding for column families
  • + *
  • validate-hfile: Check for corrupted HFiles
  • *
*

*/ @@ -49,6 +50,7 @@ public class PreUpgradeValidator implements Tool { public static final String TOOL_NAME = "pre-upgrade"; public static final String VALIDATE_CP_NAME = "validate-cp"; public static final String VALIDATE_DBE_NAME = "validate-dbe"; + public static final String VALIDATE_HFILE = "validate-hfile"; private Configuration configuration; @@ -69,6 +71,8 @@ public class PreUpgradeValidator implements Tool { VALIDATE_CP_NAME); System.out.printf(" %-15s Validate DataBlockEncodings are compatible with HBase%n", VALIDATE_DBE_NAME); + System.out.printf(" %-15s Validate HFile contents are readable%n", + VALIDATE_HFILE); System.out.println("For further information, please use command -h"); } @@ -88,6 +92,9 @@ public class PreUpgradeValidator implements Tool { case VALIDATE_DBE_NAME: tool = new DataBlockEncodingValidator(); break; + case VALIDATE_HFILE: + tool = new HFileContentValidator(); + break; case "-h": printUsage(); return AbstractHBaseTool.EXIT_FAILURE; diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index b6be8679ad..7f9e025dce 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -906,6 +906,51 @@ $ bin/hbase pre-upgrade validate-dbe This check validates all column families and print out any incompatibilities. To change `PREFIX_TREE` encoding to supported one check <>. +==== HFile Content validation +Even though Data Block Encoding is changed from `PREFIX_TREE` you could still have HFiles that contain data encoded that way. +To verify that HFiles are readable with HBase 2 you can use _HFile content validator_. + +[source, bash] +---- +$ bin/hbase pre-upgrade validate-hfile +---- + +The tool will log the corrupt HFiles and details about the root cause. +In case the problem is about PREFIX_TREE encoding you must change the encodings before upgrading to HBase 2. +To change `PREFIX_TREE` encoding to supported one check <>. + +The following log message shows an example of an HFile with `PREFIX_TREE`. + +---- +2018-06-05 16:20:46,976 WARN [hfilevalidator-pool1-t3] hbck.HFileCorruptionChecker: Found corrupt HFile hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e +org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e + at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:545) + at org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:611) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkHFile(HFileCorruptionChecker.java:101) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkColFamDir(HFileCorruptionChecker.java:185) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkRegionDir(HFileCorruptionChecker.java:323) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:408) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:399) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) + at java.lang.Thread.run(Thread.java:748) +Caused by: java.io.IOException: Invalid data block encoding type in file info: PREFIX_TREE + at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:58) + at org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.(HFileReaderImpl.java:246) + at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:538) + ... 14 more +Caused by: java.lang.IllegalArgumentException: No enum constant org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.PREFIX_TREE + at java.lang.Enum.valueOf(Enum.java:238) + at org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.valueOf(DataBlockEncoding.java:31) + at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:56) + ... 16 more +---- + [[ops.regionmgt]] == Region Management -- 2.17.0