From 95c31b2879439420870bf29acf344667debcd534 Mon Sep 17 00:00:00 2001
From: Peter Somogyi
Date: Tue, 5 Jun 2018 17:56:48 +0200
Subject: [PATCH] HBASE-20649 Validate HFiles do not have PREFIX_TREE
DataBlockEncoding
---
.../hbase/tool/PreUpgradeValidator.java | 71 +++++++++++++++++--
src/main/asciidoc/_chapters/ops_mgt.adoc | 46 ++++++++++++
2 files changed, 110 insertions(+), 7 deletions(-)
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
index 138af6a3f7..2a3736fa4b 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/tool/PreUpgradeValidator.java
@@ -1,4 +1,4 @@
-/**
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -19,8 +19,13 @@
package org.apache.hadoop.hbase.tool;
import java.io.IOException;
+import java.util.Collection;
import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
@@ -30,6 +35,9 @@ import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.AbstractHBaseTool;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.FSUtils;
+import org.apache.hadoop.hbase.util.Threads;
+import org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -43,6 +51,7 @@ import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
*
* - all: Run all pre-upgrade validations
* - validateDBE: Check Data Block Encoding for column families
+ * - validateHFiles: Validate HFile contents are readable
*
*
*/
@@ -52,21 +61,24 @@ public class PreUpgradeValidator extends AbstractHBaseTool {
public static final String NAME = "pre-upgrade";
private static final Logger LOG = LoggerFactory.getLogger(PreUpgradeValidator.class);
private static final byte[] DATA_BLOCK_ENCODING = Bytes.toBytes("DATA_BLOCK_ENCODING");
+ private static final int DEFAULT_NUM_THREADS = 20;
private boolean validateAll;
private boolean validateDBE;
+ private boolean validateHFiles;
/**
* Check DataBlockEncodings for column families.
*
+ * @param conf used configuration
* @return DataBlockEncoding compatible with HBase 2
* @throws IOException if a remote or network exception occurs
*/
- private boolean validateDBE() throws IOException {
+ private boolean validateDBE(Configuration conf) throws IOException {
int incompatibilities = 0;
LOG.info("Validating Data Block Encodings");
- try (Connection connection = ConnectionFactory.createConnection(getConf());
+ try (Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin()) {
List tableDescriptors = admin.listTableDescriptors();
String encoding = "";
@@ -89,9 +101,8 @@ public class PreUpgradeValidator extends AbstractHBaseTool {
if (incompatibilities > 0) {
LOG.warn("There are {} column families with incompatible Data Block Encodings. Do not "
- + "upgrade until these encodings are converted to a supported one.", incompatibilities);
- LOG.warn("Check http://hbase.apache.org/book.html#upgrade2.0.prefix-tree.removed "
- + "for instructions.");
+ + "upgrade until these encodings are converted to a supported one. "
+ + "Check https://s.apache.org/prefixtree for instructions.", incompatibilities);
return false;
} else {
LOG.info("The used Data Block Encodings are compatible with HBase 2.0.");
@@ -99,23 +110,69 @@ public class PreUpgradeValidator extends AbstractHBaseTool {
}
}
+ /**
+ * Check HFile contents are readable by HBase 2.
+ *
+ * @param conf used configuration
+ * @return HFiles are compatible with HBase 2
+ * @throws IOException if a remote or network exception occurs
+ */
+ private boolean validateHFileContent(Configuration conf) throws IOException {
+ Path rootDir = FSUtils.getRootDir(conf);
+ LOG.info("Validating HFile contents under {}", rootDir.toString());
+ Collection tableDirs = FSUtils.getTableDirs(FSUtils.getCurrentFileSystem(conf), rootDir);
+ HFileCorruptionChecker checker =
+ new HFileCorruptionChecker(conf, createThreadPool(conf), false);
+ checker.checkTables(tableDirs);
+
+ int checkedFiles = checker.getHFilesChecked();
+ LOG.debug("Checked {} HFiles under {}", checkedFiles, rootDir.toString());
+ Collection corrupted = checker.getCorrupted();
+ for (Path p : corrupted) {
+ LOG.warn("Corrupt file: {}", p.toString());
+ }
+
+ if (!corrupted.isEmpty()) {
+ LOG.warn("There are {} corrupted HFiles. Change data block encodings before upgrading. "
+ + "Check https://s.apache.org/prefixtree for instructions.", corrupted.size());
+ return false;
+ } else {
+ LOG.info("There are no incompatible HFiles under {}.", rootDir.toString());
+ return true;
+ }
+ }
+
+ private ExecutorService createThreadPool(Configuration conf) {
+ int numThreads = conf.getInt("preupgrade.numthreads", DEFAULT_NUM_THREADS);
+ return new ScheduledThreadPoolExecutor(numThreads,
+ Threads.newDaemonThreadFactory("preupgrade"));
+ }
+
@Override
protected void addOptions() {
addOptNoArg("all", "Run all pre-upgrade validations");
addOptNoArg("validateDBE", "Validate DataBlockEncoding are compatible on the cluster");
+ addOptNoArg("validateHFiles", "Validate HFile contents are compatible");
}
@Override
protected void processOptions(CommandLine cmd) {
validateAll = cmd.hasOption("all");
validateDBE = cmd.hasOption("validateDBE");
+ validateHFiles = cmd.hasOption("validateHFiles");
}
@Override
protected int doWork() throws Exception {
+ Configuration conf = getConf();
boolean validationFailed = false;
if (validateDBE || validateAll) {
- if (validateDBE()) {
+ if (validateDBE(conf)) {
+ validationFailed = true;
+ }
+ }
+ if (validateHFiles || validateAll) {
+ if (!validateHFileContent(conf)) {
validationFailed = true;
}
}
diff --git a/src/main/asciidoc/_chapters/ops_mgt.adoc b/src/main/asciidoc/_chapters/ops_mgt.adoc
index 6fef714c91..cbea73cfd0 100644
--- a/src/main/asciidoc/_chapters/ops_mgt.adoc
+++ b/src/main/asciidoc/_chapters/ops_mgt.adoc
@@ -864,6 +864,52 @@ $ bin/hbase pre-upgrade -validateDBE
This check validates all column families and print out any incompatibilities.
To change `PREFIX_TREE` encoding to supported one check <>.
+==== HFile content validation
+Even though Data Block Encoding is changed from `PREFIX_TREE` you could still have HFiles that contain data encoded that way.
+To verify that HFiles are readable with HBase 2 you can use _HFile content validator_.
+
+[source, bash]
+----
+$ bin/hbase pre-upgrade -validateHFiles
+----
+
+The tool will log the corrupt HFiles and details about the root cause.
+In case the problem is about PREFIX_TREE encoding you must change the encodings before upgrading to HBase 2.
+To change `PREFIX_TREE` encoding to supported one check <>.
+
+
+The following log message shows an example of a `PREFIX_TREE` HFile.
+
+----
+2018-06-05 16:20:46,976 WARN [preupgrade-pool1-t3] hbck.HFileCorruptionChecker: Found corrupt HFile hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e
+org.apache.hadoop.hbase.io.hfile.CorruptHFileException: Problem reading HFile Trailer from file hdfs://example.com:8020/hbase/data/default/t2/72ea7f7d625ee30f959897d1a3e2c350/prefix/7e6b3d73263c4851bf2b8590a9b3791e
+ at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:545)
+ at org.apache.hadoop.hbase.io.hfile.HFile.createReader(HFile.java:611)
+ at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkHFile(HFileCorruptionChecker.java:101)
+ at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkColFamDir(HFileCorruptionChecker.java:185)
+ at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker.checkRegionDir(HFileCorruptionChecker.java:323)
+ at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:408)
+ at org.apache.hadoop.hbase.util.hbck.HFileCorruptionChecker$RegionDirChecker.call(HFileCorruptionChecker.java:399)
+ at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+ at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
+ at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+ at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
+ at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
+ at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
+ at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
+ at java.lang.Thread.run(Thread.java:748)
+Caused by: java.io.IOException: Invalid data block encoding type in file info: PREFIX_TREE
+ at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:58)
+ at org.apache.hadoop.hbase.io.hfile.HFileReaderImpl.(HFileReaderImpl.java:246)
+ at org.apache.hadoop.hbase.io.hfile.HFile.openReader(HFile.java:538)
+ ... 14 more
+Caused by: java.lang.IllegalArgumentException: No enum constant org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.PREFIX_TREE
+ at java.lang.Enum.valueOf(Enum.java:238)
+ at org.apache.hadoop.hbase.io.encoding.DataBlockEncoding.valueOf(DataBlockEncoding.java:31)
+ at org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoderImpl.createFromFileInfo(HFileDataBlockEncoderImpl.java:56)
+ ... 16 more
+----
+
[[ops.regionmgt]]
== Region Management
--
2.17.0