From a9289feb0dccf6f4f500ca8cc26b755d79da298c Mon Sep 17 00:00:00 2001 From: Peter Somogyi Date: Wed, 13 Jun 2018 11:56:57 +0200 Subject: [PATCH] HBASE-20649 Validate HFiles do not have PREFIX_TREE DataBlockEncoding --- .../tool/DataBlockEncodingValidator.java | 5 +- .../hbase/tool/HFileContentValidator.java | 94 +++++++++++++++++++ .../hbase/tool/PreUpgradeValidator.java | 9 +- src/main/asciidoc/_chapters/ops_mgt.adoc | 45 +++++++++ 4 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java index e72521b122..c909725a61 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/DataBlockEncodingValidator.java @@ -76,9 +76,8 @@ public class DataBlockEncodingValidator extends AbstractHBaseTool { if (incompatibilities > 0) { LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not " - + "upgrade until these encodings are converted to a supported one.", incompatibilities); - LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed " - + "for instructions."); + + "upgrade until these encodings are converted to a supported one. " + + "Check https://s.apache.org/prefixtree for instructions.", incompatibilities); } else { LOG.info("The used Data Block Encodings are compatible with HBase 2.0."); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java new file mode 100644 index 0000000000..003167f2cd --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/HFileContentValidator.java @@ -0,0 +1,94 @@ +/** + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseInterfaceAudience; +import org.apache.hadoop.hbase.util.AbstractHBaseTool; +import org.apache.hadoop.hbase.util.FSUtils; +import org.apache.hadoop.hbase.util.Threads; +import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker; +import org.apache.yetus.audience.InterfaceAudience; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; + +@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS) +public class HFileContentValidator extends AbstractHBaseTool { + + private static final Logger LOG = LoggerFactory.getLogger(HFileContentValidator.class); + private static final int DEFAULT_NUM_THREADS = 20; + + /** + * Check HFile contents are readable by HBase 2. + * + * @param conf used configuration + * @return number of HFiles corrupted HBase + * @throws IOException if a remote or network exception occurs + */ + private int validateHFileContent(Configuration conf) throws IOException { + Path rootDir = FSUtils.getRootDir(conf); + LOG.info("Validating HFile contents under {}", rootDir.toString()); + Collection tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(conf), rootDir); + HFileCorruptionChecker checker = + new HFileCorruptionChecker(conf, createThreadPool(conf), false); + checker.checkTables(tableDirs); + + int checkedFiles = checker.getHFilesChecked(); + LOG.debug("Checked {} HFiles under {}", checkedFiles, rootDir.toString()); + Collection corrupted = checker.getCorrupted(); + for (Path p : corrupted) { + LOG.warn("Corrupt file: {}", p.toString()); + } + + if (!corrupted.isEmpty()) { + LOG.warn("There are {} corrupted HFiles. Change data block encodings before upgrading. " + + "Check https://s.apache.org/prefixtree for instructions.", corrupted.size()); + } else { + LOG.info("There are no incompatible HFiles under {}.", rootDir.toString()); + } + return corrupted.size(); + } + + private ExecutorService createThreadPool(Configuration conf) { + int numThreads = conf.getInt("hfilevalidator.numthreads", DEFAULT_NUM_THREADS); + return new ScheduledThreadPoolExecutor(numThreads, + Threads.newDaemonThreadFactory("hfilevalidator")); + } + + @Override + protected void addOptions() { + } + + @Override + protected void processOptions(CommandLine cmd) { + } + + @Override + protected int doWork() throws Exception { + return (validateHFileContent(getConf()) == 0) ? EXIT_SUCCESS : EXIT_FAILURE; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java index a3c505ef60..335732df7a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java @@ -38,6 +38,7 @@ import org.slf4j.LoggerFactory; *
    *
  • validate-cp: Validates Co-processors compatibility
  • *
  • validate-dbe: Check Data Block Encoding for column families
  • + *
  • validate-hfile: Check for corrupted HFiles
  • *
*

*/ @@ -49,6 +50,7 @@ public class PreUpgradeValidator implements Tool { public static final String TOOL_NAME = "pre-upgrade"; public static final String VALIDATE_CP_NAME = "validate-cp"; public static final String VALIDATE_DBE_NAME = "validate-dbe"; + public static final String VALIDATE_HFILE = "validate-hfile"; private Configuration configuration; @@ -67,8 +69,10 @@ public class PreUpgradeValidator implements Tool { System.out.println("Available commands:"); System.out.printf(" %-12s Validate co-processors are compatible with HBase%n", VALIDATE_CP_NAME); - System.out.printf(" %-12s Validate DataBlockEncoding are compatible on the cluster%n", + System.out.printf(" %-12s Validate DataBlockEncodings are compatible with HBase%n", VALIDATE_DBE_NAME); + System.out.printf(" %-12s Validate HFile contents are readable%n", + VALIDATE_HFILE); System.out.println("For further information, please use command -h"); } @@ -88,6 +92,9 @@ public class PreUpgradeValidator implements Tool { case VALIDATE_DBE_NAME: tool = new DataBlockEncodingValidator(); break; + case VALIDATE_HFILE: + tool = new HFileContentValidator(); + break; case "-h": printUsage(); return AbstractHBaseTool.EXIT_FAILURE; diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc index 60b749fba9..3a6f36df9e 100644 --- a/src/main/asciidoc/_chapters/ops_mgt.adoc +++ b/src/main/asciidoc/_chapters/ops_mgt.adoc @@ -892,6 +892,51 @@ $ bin/hbase pre-upgrade validate-dbe This check validates all column families and print out any incompatibilities. To change `PREFIX_TREE` encoding to supported one check <>. +==== HFile Content validation +Even though Data Block Encoding is changed from `PREFIX_TREE` you could still have HFiles that contain data encoded that way. +To verify that HFiles are readable with HBase 2 you can use _HFile content validator_. + +[source, bash] +---- +$ bin/hbase pre-upgrade validate-hfile +---- + +The tool will log the corrupt HFiles and details about the root cause. +In case the problem is about PREFIX_TREE encoding you must change the encodings before upgrading to HBase 2. +To change `PREFIX_TREE` encoding to supported one check <>. + +The following log message shows an example of an HFile with `PREFIX_TREE`. + +---- +2018-06-05 16:20:46,976 WARN [hfilevalidator-pool1-t3] hbck.HFileCorruptionChecker: Found corrupt HFile hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e +org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e + at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:545) + at org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:611) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkHFile(HFileCorruptionChecker.java:101) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkColFamDir(HFileCorruptionChecker.java:185) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkRegionDir(HFileCorruptionChecker.java:323) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:408) + at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:399) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) + at java.util.concurrent.FutureTask.run(FutureTask.java:266) + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180) + at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293) + at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) + at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) + at java.lang.Thread.run(Thread.java:748) +Caused by: java.io.IOException: Invalid data block encoding type in file info: PREFIX_TREE + at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:58) + at org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.(HFileReaderImpl.java:246) + at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:538) + ... 14 more +Caused by: java.lang.IllegalArgumentException: No enum constant org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.PREFIX_TREE + at java.lang.Enum.valueOf(Enum.java:238) + at org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.valueOf(DataBlockEncoding.java:31) + at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:56) + ... 16 more +---- + [[ops.regionmgt]] == Region Management -- 2.17.0