From 2149c97045ccdcd782605151c045a3917b03c936 Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Mon, 9 Apr 2018 13:37:44 -0500 Subject: [PATCH 1/4] HBASE-20332 shaded mapreduce module shouldn't include hadoop * modify the jar checking script to take args; make hadoop stuff optional * separate out checking the artifacts that have hadoop vs those that don't. * * Unfortunately means we need two modules for checking things * * put in a safety check that the support script for checking jar contents is maintained in both modules * * have to carve out an exception for o.a.hadoop.metrics2. :( * fix duplicated class warning * clean up dependencies in hbase-server and some modules that depend on it. * allow Hadoop to have its own htrace where it needs it * add a precommit check to make sure we're not using old htrace imports --- dev-support/hbase-personality.sh | 12 ++ hbase-backup/pom.xml | 12 +- .../src/main/resources/hbase/checkstyle.xml | 3 +- hbase-client/pom.xml | 4 - hbase-common/pom.xml | 12 -- .../java/org/apache/hadoop/hbase/net/Address.java | 2 +- hbase-endpoint/pom.xml | 14 -- hbase-examples/pom.xml | 12 -- hbase-external-blockcache/pom.xml | 4 - hbase-hadoop2-compat/pom.xml | 6 - hbase-it/pom.xml | 6 - hbase-mapreduce/pom.xml | 30 +-- hbase-replication/pom.xml | 4 - hbase-rest/pom.xml | 19 +- hbase-rsgroup/pom.xml | 4 - hbase-server/pom.xml | 103 +++++----- hbase-shaded/hbase-shaded-check-invariants/pom.xml | 54 ++++-- .../resources/ensure-jars-have-correct-contents.sh | 92 +++++++-- hbase-shaded/hbase-shaded-mapreduce/pom.xml | 190 +++++++++++++++++- .../pom.xml | 215 +++++++++++++++++++++ .../resources/ensure-jars-have-correct-contents.sh | 129 +++++++++++++ hbase-shaded/pom.xml | 13 ++ hbase-shell/pom.xml | 14 -- hbase-testing-util/pom.xml | 16 -- hbase-thrift/pom.xml | 16 -- pom.xml | 65 +++---- 26 files changed, 785 insertions(+), 266 deletions(-) create mode 100644 hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml create mode 100644 hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh diff --git a/dev-support/hbase-personality.sh b/dev-support/hbase-personality.sh index 2b1e2c34f9..77bbc2231c 100755 --- a/dev-support/hbase-personality.sh +++ b/dev-support/hbase-personality.sh @@ -649,6 +649,18 @@ function hbaseanti_patchfile ((result=result+1)) fi + warnings=$(${GREP} -c -E 'import org.apache.htrace.[^c]' "${patchfile}") + if [[ ${warnings} -gt 0 ]]; then + add_vote_table -1 hbaseanti "" "The patch appears use HTrace 3 classes instead of HTrace 4." + echo "Use of HTrace 3 in the patch" + { + printf 'Use of HTrace 3 in patchfile\n------\n' + ${GREP} -n -E 'import org.apache.htrace.[^c]' "${patchfile}" + echo "------" + } >>"${PATCH_DIR}/${logfile}" + ((result=result+1)) + fi + if [[ ${result} -gt 0 ]]; then return 1 fi diff --git a/hbase-backup/pom.xml b/hbase-backup/pom.xml index 7afd51eaf0..00a996f51e 100644 --- a/hbase-backup/pom.xml +++ b/hbase-backup/pom.xml @@ -154,10 +154,6 @@ org.apache.hadoop hadoop-common - - org.apache.htrace - htrace-core - net.java.dev.jets3t jets3t @@ -264,9 +260,6 @@ 3.0 - - 3.0-SNAPSHOT - org.apache.hadoop @@ -276,6 +269,11 @@ org.apache.hadoop hadoop-mapreduce-client-core + + org.apache.hadoop + hadoop-distcp + ${hadoop.version} + diff --git a/hbase-checkstyle/src/main/resources/hbase/checkstyle.xml b/hbase-checkstyle/src/main/resources/hbase/checkstyle.xml index c77d46b93f..601b302801 100644 --- a/hbase-checkstyle/src/main/resources/hbase/checkstyle.xml +++ b/hbase-checkstyle/src/main/resources/hbase/checkstyle.xml @@ -85,7 +85,8 @@ org.apache.commons.collections4, org.apache.commons.lang, org.apache.curator.shaded, - org.apache.htrace.shaded"/> + org.apache.htrace.shaded, + org.htrace"/> diff --git a/hbase-client/pom.xml b/hbase-client/pom.xml index f6247e30e3..bb99eec4ea 100644 --- a/hbase-client/pom.xml +++ b/hbase-client/pom.xml @@ -234,10 +234,6 @@ org.apache.hadoop hadoop-common - - org.apache.htrace - htrace-core - net.java.dev.jets3t jets3t diff --git a/hbase-common/pom.xml b/hbase-common/pom.xml index 5ae8e0b637..0aaccb89e3 100644 --- a/hbase-common/pom.xml +++ b/hbase-common/pom.xml @@ -314,12 +314,6 @@ hadoop-common - - - org.apache.htrace - htrace-core - - @@ -366,12 +360,6 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java index ab7fa3bcd4..b7931a451c 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/net/Address.java @@ -26,7 +26,7 @@ import org.apache.hbase.thirdparty.com.google.common.net.HostAndPort; * An immutable type to hold a hostname and port combo, like an Endpoint * or java.net.InetSocketAddress (but without danger of our calling * resolve -- we do NOT want a resolve happening every time we want - * to hold a hostname and port combo). This class is also <>. + * to hold a hostname and port combo). This class is also @{code Comparable}. *

In implementation this class is a facade over Guava's {@link HostAndPort}. * We cannot have Guava classes in our API hence this Type. */ diff --git a/hbase-endpoint/pom.xml b/hbase-endpoint/pom.xml index e9a8cf7132..a831d3a398 100644 --- a/hbase-endpoint/pom.xml +++ b/hbase-endpoint/pom.xml @@ -260,12 +260,6 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - org.apache.hadoop @@ -296,10 +290,6 @@ hadoop-minicluster test - - org.apache.htrace - htrace-core - com.google.guava guava @@ -343,10 +333,6 @@ org.apache.hadoop hadoop-minicluster - - org.apache.htrace - htrace-core - com.google.guava guava diff --git a/hbase-examples/pom.xml b/hbase-examples/pom.xml index 8814491373..b7847ef067 100644 --- a/hbase-examples/pom.xml +++ b/hbase-examples/pom.xml @@ -228,12 +228,6 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - @@ -283,12 +277,6 @@ org.apache.hadoop hadoop-minicluster - - - org.apache.htrace - htrace-core - - diff --git a/hbase-external-blockcache/pom.xml b/hbase-external-blockcache/pom.xml index 24c33ebe48..2479b46039 100644 --- a/hbase-external-blockcache/pom.xml +++ b/hbase-external-blockcache/pom.xml @@ -224,10 +224,6 @@ org.apache.hadoop hadoop-common - - org.apache.htrace - htrace-core - com.google.guava guava diff --git a/hbase-hadoop2-compat/pom.xml b/hbase-hadoop2-compat/pom.xml index 6a313ab4f5..5174f26dc6 100644 --- a/hbase-hadoop2-compat/pom.xml +++ b/hbase-hadoop2-compat/pom.xml @@ -160,12 +160,6 @@ limitations under the License. org.apache.hadoop hadoop-common ${hadoop.version} - - - org.apache.htrace - htrace-core - - org.apache.commons diff --git a/hbase-it/pom.xml b/hbase-it/pom.xml index 9b1a167155..4c17245a9e 100644 --- a/hbase-it/pom.xml +++ b/hbase-it/pom.xml @@ -335,12 +335,6 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - org.apache.hadoop diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml index af80737644..2bf693bc3d 100644 --- a/hbase-mapreduce/pom.xml +++ b/hbase-mapreduce/pom.xml @@ -196,6 +196,15 @@ org.apache.hbase hbase-server + + + + commons-logging + commons-logging + + org.apache.hbase @@ -246,10 +255,19 @@ junit test + com.fasterxml.jackson.core jackson-databind + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-annotations + @@ -284,10 +302,6 @@ org.apache.hadoop hadoop-common - - org.apache.htrace - htrace-core - net.java.dev.jets3t jets3t @@ -334,10 +348,6 @@ org.apache.hadoop hadoop-hdfs - - org.apache.htrace - htrace-core - javax.servlet.jsp jsp-api @@ -377,10 +387,6 @@ hadoop-minicluster test - - org.apache.htrace - htrace-core - org.apache.zookeeper zookeeper diff --git a/hbase-replication/pom.xml b/hbase-replication/pom.xml index d05c60ef64..b999c1d6cc 100644 --- a/hbase-replication/pom.xml +++ b/hbase-replication/pom.xml @@ -155,10 +155,6 @@ org.apache.hadoop hadoop-common - - org.apache.htrace - htrace-core - net.java.dev.jets3t jets3t diff --git a/hbase-rest/pom.xml b/hbase-rest/pom.xml index 617f254a2f..d06feec283 100644 --- a/hbase-rest/pom.xml +++ b/hbase-rest/pom.xml @@ -299,6 +299,19 @@ com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider + + + org.codehaus.jettison + jettison + + + stax + stax-api + + + org.glassfish.web @@ -374,12 +387,6 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - org.apache.hadoop diff --git a/hbase-rsgroup/pom.xml b/hbase-rsgroup/pom.xml index 2d9a10d16c..1cc38549b9 100644 --- a/hbase-rsgroup/pom.xml +++ b/hbase-rsgroup/pom.xml @@ -198,10 +198,6 @@ org.apache.hadoop hadoop-common - - org.apache.htrace - htrace-core - net.java.dev.jets3t jets3t diff --git a/hbase-server/pom.xml b/hbase-server/pom.xml index 11361d886b..e9daf93aae 100644 --- a/hbase-server/pom.xml +++ b/hbase-server/pom.xml @@ -71,6 +71,14 @@ org.apache.maven.plugins maven-remote-resources-plugin 1.5 + + + + org.apache.hbase + hbase-resource-bundle + ${project.version} + + default @@ -392,12 +400,6 @@ org.apache.hbase hbase-metrics - - - org.apache.hbase - hbase-resource-bundle - true - commons-codec commons-codec @@ -437,19 +439,16 @@ jetty-webapp - + org.glassfish.web javax.servlet.jsp + - org.codehaus.jettison - jettison - - - stax - stax-api - - + javax.servlet.jsp + javax.servlet.jsp-api @@ -501,9 +500,20 @@ javax.servlet javax.servlet-api + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-core + - javax.ws.rs - javax.ws.rs-api + com.fasterxml.jackson.core + jackson-annotations @@ -511,11 +521,6 @@ org.apache.htrace htrace-core4 - - org.apache.htrace - htrace-core - ${htrace-hadoop.version} - com.lmax disruptor @@ -556,6 +561,15 @@ httpcore test + + + commons-logging + commons-logging + compile + org.apache.commons commons-crypto @@ -674,34 +688,10 @@ - - org.apache.hadoop - hadoop-distcp - ${hadoop-two.version} - org.apache.hadoop hadoop-common - - org.apache.hadoop - hadoop-auth - - - org.apache.hadoop - hadoop-annotations - ${hadoop-two.version} - - - org.apache.hadoop - hadoop-client - - - com.google.guava - guava - - - org.apache.hadoop hadoop-mapreduce-client-core @@ -796,21 +786,32 @@ org.apache.hadoop - hadoop-distcp - ${hadoop-three.version} + hadoop-common org.apache.hadoop - hadoop-common + hadoop-hdfs + + + org.apache.hadoop + hadoop-hdfs-client + ${hadoop.version} + + + org.apache.hadoop + hadoop-mapreduce-client-core org.apache.hadoop hadoop-hdfs + test-jar + test org.apache.hadoop - hadoop-annotations - ${hadoop-three.version} + hadoop-mapreduce-client-jobclient + test-jar + test org.apache.hadoop diff --git a/hbase-shaded/hbase-shaded-check-invariants/pom.xml b/hbase-shaded/hbase-shaded-check-invariants/pom.xml index 7322769f0b..7ba4a41782 100644 --- a/hbase-shaded/hbase-shaded-check-invariants/pom.xml +++ b/hbase-shaded/hbase-shaded-check-invariants/pom.xml @@ -26,7 +26,7 @@ Enforces our invariants for our shaded artifacts. e.g. shaded clients have a specific set of transitive dependencies and shaded clients only contain classes that are in particular packages. Does the enforcement through - the maven-enforcer-plugin and and integration test. + the maven-enforcer-plugin and integration test. Apache HBase Shaded Packaging Invariants @@ -34,11 +34,15 @@ - - org.apache.hbase - hbase-shaded-client - ${project.version} - + org.apache.hbase hbase-shaded-mapreduce @@ -113,6 +117,8 @@ com.github.stephenc.findbugs:* org.apache.htrace:* + + org.apache.hadoop:* @@ -158,18 +164,37 @@ - org.codehaus.mojo exec-maven-plugin 1.6.0 + + + make-sure-validation-files-are-in-sync + validate + + exec + + + diff + false + + ../hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + ../hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + + + + check-jar-contents integration-test @@ -180,6 +205,9 @@ ${shell-executable} ${project.build.testOutputDirectory} false + ensure-jars-have-correct-contents.sh ${hbase-client-artifacts} diff --git a/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh index 8bda8ce953..eff1d20302 100644 --- a/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh +++ b/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -15,33 +15,67 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar] -# -# accepts a single command line argument with a colon separated list of -# paths to jars to check. Iterates through each such passed jar and checks -# all the contained paths to make sure they follow the below constructed -# safe list. +set -e +function usage { + echo "Usage: ${0} [options] [/path/to/some/example.jar:/path/to/another/example/created.jar]" + echo "" + echo " accepts a single command line argument with a colon separated list of" + echo " paths to jars to check. Iterates through each such passed jar and checks" + echo " all the contained paths to make sure they follow the below constructed" + echo " safe list." + echo "" + echo " --allow-hadoop Include stuff from the Apache Hadoop project in the list" + echo " of allowed jar contents. default: false" + echo " --debug print more info to stderr" + exit 1 +} +# if no args specified, show usage +if [ $# -lt 1 ]; then + usage +fi + +# Get arguments +declare allow_hadoop +declare debug +while [ $# -gt 0 ] +do + case "$1" in + --allow-hadoop) shift; allow_hadoop="true";; + --debug) shift; debug="true";; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have jars to check. +if [ $# -lt 1 ]; then + usage +fi +if [ -n "${debug}" ]; then + echo "[DEBUG] Checking on jars: $*" >&2 + echo "jar command is: $(which jar)" >&2 + echo "grep command is: $(which grep)" >&2 + grep -V >&2 || true +fi + +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") -# we have to allow the directories that lead to the org/apache/hadoop dir -allowed_expr="(^org/$|^org/apache/$" +# we have to allow the directories that lead to the hbase dirs +allowed_expr="(^org/$|^org/apache/$|^org/apache/hadoop/$" # We allow the following things to exist in our client artifacts: -# * classes in packages that start with org.apache.hadoop, which by -# convention should be in a path that looks like org/apache/hadoop -allowed_expr+="|^org/apache/hadoop/" +# * classes in packages that start with org.apache.hadoop.hbase, which by +# convention should be in a path that looks like org/apache/hadoop/hbase +allowed_expr+="|^org/apache/hadoop/hbase" # * classes in packages that start with org.apache.hbase allowed_expr+="|^org/apache/hbase/" # * whatever in the "META-INF" directory allowed_expr+="|^META-INF/" # * the folding tables from jcodings allowed_expr+="|^tables/" -# * Hadoop's and HBase's default configuration files, which have the form +# * HBase's default configuration files, which have the form # "_module_-default.xml" -allowed_expr+="|^[^-]*-default.xml$" -# * Hadoop's versioning properties files, which have the form -# "_module_-version-info.properties" -allowed_expr+="|^[^-]*-version-info.properties$" -# * Hadoop's application classloader properties file. -allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +allowed_expr+="|^hbase-default.xml$" # public suffix list used by httpcomponents allowed_expr+="|^mozilla/$" allowed_expr+="|^mozilla/public-suffix-list.txt$" @@ -51,12 +85,30 @@ allowed_expr+="|^properties.dtd$" allowed_expr+="|^PropertyList-1.0.dtd$" +if [ -n "${allow_hadoop}" ]; then + # * classes in packages that start with org.apache.hadoop, which by + # convention should be in a path that looks like org/apache/hadoop + allowed_expr+="|^org/apache/hadoop/" + # * Hadoop's default configuration files, which have the form + # "_module_-default.xml" + allowed_expr+="|^[^-]*-default.xml$" + # * Hadoop's versioning properties files, which have the form + # "_module_-version-info.properties" + allowed_expr+="|^[^-]*-version-info.properties$" + # * Hadoop's application classloader properties file. + allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +else + # We have some classes for integrating with the Hadoop Metrics2 system + # that have to be in a particular package space due to access rules. + allowed_expr+="|^org/apache/hadoop/metrics2" +fi + + allowed_expr+=")" declare -i bad_artifacts=0 declare -a bad_contents -IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") for artifact in "${artifact_list[@]}"; do - bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}")) + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}" || true)) if [ ${#bad_contents[@]} -gt 0 ]; then echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" echo " Please check the following and either correct the build or update" diff --git a/hbase-shaded/hbase-shaded-mapreduce/pom.xml b/hbase-shaded/hbase-shaded-mapreduce/pom.xml index cfcc357877..edc80cd151 100644 --- a/hbase-shaded/hbase-shaded-mapreduce/pom.xml +++ b/hbase-shaded/hbase-shaded-mapreduce/pom.xml @@ -62,6 +62,10 @@ + org.apache.hbase hbase-mapreduce @@ -137,10 +141,6 @@ org.eclipse.jetty jetty-webapp - - org.glassfish.web - javax.servlet.jsp - org.glassfish.jersey.core jersey-server @@ -149,6 +149,17 @@ org.glassfish.jersey.containers jersey-container-servlet-core + + + org.glassfish.web + javax.servlet.jsp + + + javax.servlet.jsp + javax.servlet.jsp-api + @@ -158,12 +169,175 @@ release - - org.apache.maven.plugins - maven-shade-plugin - + + + org.apache.maven.plugins + maven-shade-plugin + + + aggregate-into-a-jar-with-relocated-third-parties + + + + org.apache.hadoop:* + + org.apache.hbase:hbase-resource-bundle + org.slf4j:* + com.google.code.findbugs:* + com.github.stephenc.findbugs:* + org.apache.htrace:* + org.apache.yetus:* + log4j:* + commons-logging:* + + + + + + + + + + hadoop-2.0 + + + + !hadoop.profile + + + + + org.apache.hadoop + hadoop-common + provided + + + net.java.dev.jets3t + jets3t + + + javax.servlet.jsp + jsp-api + + + org.mortbay.jetty + jetty + + + com.sun.jersey + jersey-server + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + javax.servlet + servlet-api + + + tomcat + jasper-compiler + + + tomcat + jasper-runtime + + + com.google.code.findbugs + jsr305 + + + + + org.apache.hadoop + hadoop-hdfs + provided + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + io.netty + netty + + + stax + stax-api + + + xerces + xercesImpl + + + ${hadoop-two.version} + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + com.google.guava + guava + + + + + org.apache.hadoop + hadoop-auth + provided + + + + + + + hadoop-3.0 + + + hadoop.profile + 3.0 + + + + ${hadoop-three.version} + + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-hdfs + provided + + + org.apache.hadoop + hadoop-auth + provided + + + diff --git a/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml new file mode 100644 index 0000000000..07789f4712 --- /dev/null +++ b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml @@ -0,0 +1,215 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 3.0.0-SNAPSHOT + ../.. + + hbase-shaded-with-hadoop-check-invariants + pom + + + Enforces our invariants for our shaded artifacts. e.g. shaded clients have + a specific set of transitive dependencies and shaded clients only contain + classes that are in particular packages. Does the enforcement through + the maven-enforcer-plugin and integration test. + + Apache HBase Shaded Packaging Invariants (with Hadoop bundled) + + + + + + + + org.apache.hbase + hbase-shaded-client + ${project.version} + + + + com.github.stephenc.findbugs + findbugs-annotations + provided + + + log4j + log4j + provided + + + + junit + junit + provided + + + org.mockito + mockito-core + provided + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + org.codehaus.mojo + extra-enforcer-rules + 1.0-beta-6 + + + + + enforce-banned-dependencies + + enforce + + + true + + + + + + org.slf4j:* + log4j:* + commons-logging:* + + com.google.code.findbugs:* + com.github.stephenc.findbugs:* + + org.apache.htrace:* + + + + + true + + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + test-resources + pre-integration-test + + testResources + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + put-client-artifacts-in-a-property + pre-integration-test + + build-classpath + + + provided + true + hbase-client-artifacts + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + make-sure-validation-files-are-in-sync + validate + + exec + + + diff + false + + ../hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + ../hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + + + + + + check-jar-contents-for-stuff-with-hadoop + integration-test + + exec + + + ${shell-executable} + ${project.build.testOutputDirectory} + false + + ensure-jars-have-correct-contents.sh + --allow-hadoop + ${hbase-client-artifacts} + + + + + + + + + diff --git a/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh new file mode 100644 index 0000000000..eff1d20302 --- /dev/null +++ b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +function usage { + echo "Usage: ${0} [options] [/path/to/some/example.jar:/path/to/another/example/created.jar]" + echo "" + echo " accepts a single command line argument with a colon separated list of" + echo " paths to jars to check. Iterates through each such passed jar and checks" + echo " all the contained paths to make sure they follow the below constructed" + echo " safe list." + echo "" + echo " --allow-hadoop Include stuff from the Apache Hadoop project in the list" + echo " of allowed jar contents. default: false" + echo " --debug print more info to stderr" + exit 1 +} +# if no args specified, show usage +if [ $# -lt 1 ]; then + usage +fi + +# Get arguments +declare allow_hadoop +declare debug +while [ $# -gt 0 ] +do + case "$1" in + --allow-hadoop) shift; allow_hadoop="true";; + --debug) shift; debug="true";; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have jars to check. +if [ $# -lt 1 ]; then + usage +fi +if [ -n "${debug}" ]; then + echo "[DEBUG] Checking on jars: $*" >&2 + echo "jar command is: $(which jar)" >&2 + echo "grep command is: $(which grep)" >&2 + grep -V >&2 || true +fi + +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") + +# we have to allow the directories that lead to the hbase dirs +allowed_expr="(^org/$|^org/apache/$|^org/apache/hadoop/$" +# We allow the following things to exist in our client artifacts: +# * classes in packages that start with org.apache.hadoop.hbase, which by +# convention should be in a path that looks like org/apache/hadoop/hbase +allowed_expr+="|^org/apache/hadoop/hbase" +# * classes in packages that start with org.apache.hbase +allowed_expr+="|^org/apache/hbase/" +# * whatever in the "META-INF" directory +allowed_expr+="|^META-INF/" +# * the folding tables from jcodings +allowed_expr+="|^tables/" +# * HBase's default configuration files, which have the form +# "_module_-default.xml" +allowed_expr+="|^hbase-default.xml$" +# public suffix list used by httpcomponents +allowed_expr+="|^mozilla/$" +allowed_expr+="|^mozilla/public-suffix-list.txt$" +# Comes from commons-configuration, not sure if relocatable. +allowed_expr+="|^digesterRules.xml$" +allowed_expr+="|^properties.dtd$" +allowed_expr+="|^PropertyList-1.0.dtd$" + + +if [ -n "${allow_hadoop}" ]; then + # * classes in packages that start with org.apache.hadoop, which by + # convention should be in a path that looks like org/apache/hadoop + allowed_expr+="|^org/apache/hadoop/" + # * Hadoop's default configuration files, which have the form + # "_module_-default.xml" + allowed_expr+="|^[^-]*-default.xml$" + # * Hadoop's versioning properties files, which have the form + # "_module_-version-info.properties" + allowed_expr+="|^[^-]*-version-info.properties$" + # * Hadoop's application classloader properties file. + allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +else + # We have some classes for integrating with the Hadoop Metrics2 system + # that have to be in a particular package space due to access rules. + allowed_expr+="|^org/apache/hadoop/metrics2" +fi + + +allowed_expr+=")" +declare -i bad_artifacts=0 +declare -a bad_contents +for artifact in "${artifact_list[@]}"; do + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}" || true)) + if [ ${#bad_contents[@]} -gt 0 ]; then + echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" + echo " Please check the following and either correct the build or update" + echo " the allowed list with reasoning." + echo "" + for bad_line in "${bad_contents[@]}"; do + echo " ${bad_line}" + done + bad_artifacts=${bad_artifacts}+1 + else + echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'" + fi +done + +# if there was atleast one bad artifact, exit with failure +if [ "${bad_artifacts}" -gt 0 ]; then + exit 1 +fi diff --git a/hbase-shaded/pom.xml b/hbase-shaded/pom.xml index 24c515844e..93b122fe08 100644 --- a/hbase-shaded/pom.xml +++ b/hbase-shaded/pom.xml @@ -42,6 +42,7 @@ hbase-shaded-client hbase-shaded-mapreduce hbase-shaded-check-invariants + hbase-shaded-with-hadoop-check-invariants @@ -118,6 +119,7 @@ maven-shade-plugin + aggregate-into-a-jar-with-relocated-third-parties package shade @@ -449,12 +451,23 @@ META-INF/ECLIPSEF.RSA + + + commons-beanutils:commons-beanutils-core + + org/apache/commons/collections/*.class + + org.apache.hbase:hbase-server hbase-webapps/* hbase-webapps/**/* + **/*_jsp.class diff --git a/hbase-shell/pom.xml b/hbase-shell/pom.xml index 8eaefaab83..85f0415605 100644 --- a/hbase-shell/pom.xml +++ b/hbase-shell/pom.xml @@ -277,12 +277,6 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - org.apache.hadoop @@ -341,10 +335,6 @@ hadoop-minicluster test - - org.apache.htrace - htrace-core - com.google.guava guava @@ -408,10 +398,6 @@ org.apache.hadoop hadoop-minicluster - - org.apache.htrace - htrace-core - com.google.guava guava diff --git a/hbase-testing-util/pom.xml b/hbase-testing-util/pom.xml index 0f1b86cf83..032de61e3d 100644 --- a/hbase-testing-util/pom.xml +++ b/hbase-testing-util/pom.xml @@ -140,12 +140,6 @@ org.apache.hadoop hadoop-common compile - - - org.apache.htrace - htrace-core - - org.apache.hadoop @@ -201,10 +195,6 @@ hadoop-minicluster compile - - org.apache.htrace - htrace-core - com.google.guava guava @@ -242,12 +232,6 @@ org.apache.hadoop hadoop-minicluster compile - - - org.apache.htrace - htrace-core - - org.apache.hadoop diff --git a/hbase-thrift/pom.xml b/hbase-thrift/pom.xml index 0142ccdb59..aec3cb4668 100644 --- a/hbase-thrift/pom.xml +++ b/hbase-thrift/pom.xml @@ -498,22 +498,12 @@ org.apache.hadoop hadoop-common - - - org.apache.htrace - htrace-core - - org.apache.hadoop hadoop-minicluster test - - org.apache.htrace - htrace-core - com.google.guava guava @@ -571,12 +561,6 @@ org.apache.hadoop hadoop-minicluster - - - org.apache.htrace - htrace-core - - diff --git a/pom.xml b/pom.xml index c5d814820f..46dc8d3dcd 100755 --- a/pom.xml +++ b/pom.xml @@ -1442,8 +1442,11 @@ 11.0.2 hbase-hadoop2-compat src/main/assembly/hadoop-two-compat.xml - 0.5.0 + + 3.6.2.Final + 0.5.0 1.7.7 1.10 @@ -1471,7 +1474,6 @@ 4.12 1.3 4.2.0-incubating - 3.2.0-incubating 1.2.17 2.1.0 @@ -1599,7 +1601,8 @@ org.mortbay.jetty:servlet-api, javax.servlet:servlet-api: These are excluded because they are the same implementations. I chose org.mortbay.jetty:servlet-api-2.5 instead, which is a third implementation of the same, because Hadoop also uses this version - javax.servlet:jsp-api in favour of org.mortbay.jetty:jsp-api-2.1 + javax.servlet:jsp-api in favour of javax.servlet.jsp:javax.servlet.jsp-api:2.3.1 since it + is what glassfish's jspC jar uses and that's where we get our own need for a jsp-api. --> @@ -1914,6 +1917,14 @@ commons-math3 ${commons-math.version} + + + commons-logging + commons-logging + 1.2 + org.apache.zookeeper zookeeper @@ -1977,6 +1988,16 @@ jackson-jaxrs-json-provider ${jackson.version} + + com.fasterxml.jackson.core + jackson-annotations + ${jackson.version} + + + com.fasterxml.jackson.core + jackson-core + ${jackson.version} + com.fasterxml.jackson.core jackson-databind @@ -2080,6 +2101,12 @@ javax.servlet.jsp ${glassfish.jsp.version} + + + javax.servlet.jsp + javax.servlet.jsp-api + 2.3.1 + org.glassfish javax.el @@ -2544,10 +2571,6 @@ hadoop-hdfs ${hadoop-two.version} - - org.apache.htrace - htrace-core - javax.servlet.jsp jsp-api @@ -2589,10 +2612,6 @@ test-jar test - - org.apache.htrace - htrace-core - javax.servlet.jsp jsp-api @@ -2637,10 +2656,6 @@ hadoop-common ${hadoop-two.version} - - org.apache.htrace - htrace-core - commons-beanutils commons-beanutils @@ -2691,10 +2706,6 @@ hadoop-minicluster ${hadoop-two.version} - - org.apache.htrace - htrace-core - commons-httpclient commons-httpclient @@ -2813,10 +2824,6 @@ hadoop-hdfs ${hadoop-three.version} - - org.apache.htrace - htrace-core - com.sun.jersey jersey-core @@ -2862,10 +2869,6 @@ test-jar test - - org.apache.htrace - htrace-core - javax.servlet.jsp jsp-api @@ -2946,10 +2949,6 @@ com.sun.jersey jersey-server - - org.apache.htrace - htrace-core - javax.servlet.jsp jsp-api @@ -3014,10 +3013,6 @@ hadoop-minicluster ${hadoop-three.version} - - org.apache.htrace - htrace-core - commons-httpclient commons-httpclient -- 2.16.1 From cca47afa9834119aa167ee71800843537d4d9e3d Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Tue, 24 Apr 2018 14:51:12 -0500 Subject: [PATCH 2/4] HBASE-20333 Provide a shaded client that allows downstream to provide Hadoop needs. --- hbase-shaded/hbase-shaded-check-invariants/pom.xml | 5 ++ .../hbase-shaded-client-byo-hadoop/pom.xml | 70 ++++++++++++++++++++++ hbase-shaded/hbase-shaded-client/pom.xml | 35 +++++++++-- hbase-shaded/hbase-shaded-mapreduce/pom.xml | 30 ++-------- hbase-shaded/pom.xml | 6 ++ 5 files changed, 115 insertions(+), 31 deletions(-) create mode 100644 hbase-shaded/hbase-shaded-client-byo-hadoop/pom.xml diff --git a/hbase-shaded/hbase-shaded-check-invariants/pom.xml b/hbase-shaded/hbase-shaded-check-invariants/pom.xml index 7ba4a41782..287a986568 100644 --- a/hbase-shaded/hbase-shaded-check-invariants/pom.xml +++ b/hbase-shaded/hbase-shaded-check-invariants/pom.xml @@ -48,6 +48,11 @@ hbase-shaded-mapreduce ${project.version} + + org.apache.hbase + hbase-shaded-client-byo-hadoop + ${project.version} + com.github.stephenc.findbugs diff --git a/hbase-shaded/hbase-shaded-client-byo-hadoop/pom.xml b/hbase-shaded/hbase-shaded-client-byo-hadoop/pom.xml new file mode 100644 index 0000000000..c51a1af745 --- /dev/null +++ b/hbase-shaded/hbase-shaded-client-byo-hadoop/pom.xml @@ -0,0 +1,70 @@ + + + 4.0.0 + + hbase-shaded + org.apache.hbase + 3.0.0-SNAPSHOT + .. + + hbase-shaded-client-byo-hadoop + Apache HBase - Shaded - Client + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + + maven-assembly-plugin + + true + + + + + + + org.apache.hbase + hbase-client + + + + + + release + + + + org.apache.maven.plugins + maven-shade-plugin + + + + + + diff --git a/hbase-shaded/hbase-shaded-client/pom.xml b/hbase-shaded/hbase-shaded-client/pom.xml index 72a5b6058e..5ac3ef5adb 100644 --- a/hbase-shaded/hbase-shaded-client/pom.xml +++ b/hbase-shaded/hbase-shaded-client/pom.xml @@ -28,7 +28,7 @@ .. hbase-shaded-client - Apache HBase - Shaded - Client + Apache HBase - Shaded - Client (with Hadoop bundled) @@ -51,6 +51,7 @@ org.apache.hbase hbase-client + ${project.version} @@ -59,10 +60,34 @@ release - - org.apache.maven.plugins - maven-shade-plugin - + + org.apache.maven.plugins + maven-shade-plugin + + + aggregate-into-a-jar-with-relocated-third-parties + + + + + + org.apache.hbase:hbase-resource-bundle + org.slf4j:* + com.google.code.findbugs:* + com.github.stephenc.findbugs:* + org.apache.htrace:* + org.apache.yetus:* + log4j:* + commons-logging:* + + + + + + diff --git a/hbase-shaded/hbase-shaded-mapreduce/pom.xml b/hbase-shaded/hbase-shaded-mapreduce/pom.xml index edc80cd151..598f3af938 100644 --- a/hbase-shaded/hbase-shaded-mapreduce/pom.xml +++ b/hbase-shaded/hbase-shaded-mapreduce/pom.xml @@ -169,32 +169,10 @@ release - - - org.apache.maven.plugins - maven-shade-plugin - - - aggregate-into-a-jar-with-relocated-third-parties - - - - org.apache.hadoop:* - - org.apache.hbase:hbase-resource-bundle - org.slf4j:* - com.google.code.findbugs:* - com.github.stephenc.findbugs:* - org.apache.htrace:* - org.apache.yetus:* - log4j:* - commons-logging:* - - - - - - + + org.apache.maven.plugins + maven-shade-plugin + diff --git a/hbase-shaded/pom.xml b/hbase-shaded/pom.xml index 93b122fe08..9eb30e0941 100644 --- a/hbase-shaded/pom.xml +++ b/hbase-shaded/pom.xml @@ -39,6 +39,7 @@ org.apache.hadoop.hbase.shaded + hbase-shaded-client-byo-hadoop hbase-shaded-client hbase-shaded-mapreduce hbase-shaded-check-invariants @@ -131,6 +132,11 @@ false + + org.apache.hadoop:* + org.apache.hbase:hbase-resource-bundle org.slf4j:* com.google.code.findbugs:* -- 2.16.1 From 6b62c0302b03452b1ca4f8f2389765f88c61a87b Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Fri, 18 May 2018 11:11:42 -0500 Subject: [PATCH 3/4] HBASE-20615 ship shaded client artifacts in binary tarball. --- hbase-assembly/pom.xml | 16 ++++++++++++++ .../src/main/assembly/hadoop-two-compat.xml | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/hbase-assembly/pom.xml b/hbase-assembly/pom.xml index 5da105b24e..acb6f53ed3 100644 --- a/hbase-assembly/pom.xml +++ b/hbase-assembly/pom.xml @@ -189,6 +189,22 @@ + + + org.apache.hbase + hbase-shaded-client + ${project.version} + + + org.apache.hbase + hbase-shaded-client-byo-hadoop + ${project.version} + + + org.apache.hbase + hbase-shaded-mapreduce + ${project.version} + org.apache.hbase diff --git a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml index 69a800bc2a..6ab621a054 100644 --- a/hbase-assembly/src/main/assembly/hadoop-two-compat.xml +++ b/hbase-assembly/src/main/assembly/hadoop-two-compat.xml @@ -73,11 +73,36 @@ com.sun.jersey:* com.sun.jersey.contribs:* jline:jline + org.apache.hbase:hbase-shaded-client + org.apache.hbase:hbase-shaded-client-byo-hadoop + org.apache.hbase:hbase-shaded-mapreduce + + + true + + org.apache.hbase:hbase-shaded-client + org.apache.hbase:hbase-shaded-mapreduce + org.apache.hbase:hbase-shaded-client-byo-hadoop + + + lib/shaded-clients + false + + + + org.apache.hbase:hbase-shaded-client + org.apache.hbase:hbase-shaded-mapreduce + org.apache.hbase:hbase-shaded-client-byo-hadoop + + + + + -- 2.16.1 From 0555068c352517e02ef5b172580d886de765659a Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Tue, 1 May 2018 14:28:52 -0500 Subject: [PATCH 4/4] HBASE-20334 add a test that verifies basic client and MR integration --- .../hbase_nightly_pseudo-distributed-test.sh | 481 +++++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100755 dev-support/hbase_nightly_pseudo-distributed-test.sh diff --git a/dev-support/hbase_nightly_pseudo-distributed-test.sh b/dev-support/hbase_nightly_pseudo-distributed-test.sh new file mode 100755 index 0000000000..11fa523bfe --- /dev/null +++ b/dev-support/hbase_nightly_pseudo-distributed-test.sh @@ -0,0 +1,481 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +function usage { + echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar" + echo "" + echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data." + echo " defaults to 'zk-data' in the working-dir." + echo " --working-dir /path/to/use Path for writing configs and logs. must exist." + echo " defaults to making a directory via mktemp." + echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars." + echo " defaults to 'hadoop classpath'" + echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase" + echo " --single-process Run as single process instead of pseudo-distributed" + echo "" + exit 1 +} +# if no args specified, show usage +if [ $# -lt 4 ]; then + usage +fi + +# Get arguments +declare component_install +declare hadoop_exec +declare working_dir +declare zk_data_dir +declare clean +declare distributed="true" +declare hadoop_jars +while [ $# -gt 0 ] +do + case "$1" in + --working-dir) shift; working_dir=$1; shift;; + --force-data-clean) shift; clean="true";; + --zookeeper-data) shift; zk_data_dir=$1; shift;; + --single-process) shift; distributed="false";; + --hadoop-client-classpath) shift; hadoop_jars="$1"; shift;; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have where component checkout is. +if [ $# -lt 4 ]; then + usage +fi +component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" +hadoop_exec="$2" +yarn_server_tests_test_jar="$3" +mapred_jobclient_test_jar="$4" + +if [ ! -x "${hadoop_exec}" ]; then + echo "hadoop cli does not appear to be executable." >&2 + exit 1 +fi + +if [ ! -d "${component_install}" ]; then + echo "Path to HBase binary install should be a directory." >&2 + exit 1 +fi + +if [ -z "${working_dir}" ]; then + if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then + echo "Failed to create temporary working directory. Please specify via --working-dir" >&2 + exit 1 + fi +else + # absolutes please + working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")" + if [ ! -d "${working_dir}" ]; then + echo "passed working directory '${working_dir}' must already exist." >&2 + exit 1 + fi +fi + +if [ -z "${zk_data_dir}" ]; then + zk_data_dir="${working_dir}/zk-data" + mkdir "${zk_data_dir}" +else + # absolutes please + zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")" + if [ ! -d "${zk_data_dir}" ]; then + echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist." + exit 1 + fi +fi + +echo "You'll find logs and temp files in ${working_dir}" + +function redirect_and_run { + log_base=$1 + shift + echo "$*" >"${log_base}.err" + "$@" >"${log_base}.out" 2>>"${log_base}.err" +} + +(cd "${working_dir}" + +echo "Hadoop version information:" +"${hadoop_exec}" version +hadoop_version=$("${hadoop_exec}" version | head -n 1) +hadoop_version="${hadoop_version#Hadoop }" +if [ "${hadoop_version%.*.*}" -gt 2 ]; then + "${hadoop_exec}" envvars +else + echo "JAVA_HOME: ${JAVA_HOME}" +fi + +# Ensure that if some other Hadoop install happens to be present in the environment we ignore it. +HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true" +export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP + +if [ -n "${clean}" ]; then + echo "Cleaning out ZooKeeper..." + rm -rf "${zk_data_dir:?}/*" +fi + +echo "HBase version information:" +"${component_install}/bin/hbase" version 2>/dev/null +hbase_version=$("${component_install}/bin/hbase" version | head -n 1 2>/dev/null) +hbase_version="${hbase_version#HBase }" + +if [ ! -s "${component_install}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then + echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2 + exit 1 +fi + +if [ ! -s "${component_install}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then + echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 + exit 1 +fi + +if [ ! -s "${component_install}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then + echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 + exit 1 +fi + +echo "Writing out configuration for HBase." +rm -rf "${working_dir}/hbase-conf" +mkdir "${working_dir}/hbase-conf" + +if [ -f "${component_install}/conf/log4j.properties" ]; then + cp "${component_install}/conf/log4j.properties" "${working_dir}/hbase-conf/log4j.properties" +else + cat >"${working_dir}/hbase-conf/log4j.properties" <"${working_dir}/hbase-conf/hbase-site.xml" < + + + + + hbase.rootdir + + /hbase + + + hbase.zookeeper.property.dataDir + ${zk_data_dir} + + + hbase.cluster.distributed + ${distributed} + + +EOF + +if [ "true" = "${distributed}" ]; then + cat >"${working_dir}/hbase-conf/regionservers" <"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & +echo "$!" > "${working_dir}/hadoop.pid" + +sleep_time=2 +until [ -s "${working_dir}/hbase-conf/core-site.xml" ]; do + printf '\twaiting for Hadoop to finish starting up.\n' + sleep "${sleep_time}" + sleep_time="$((sleep_time*2))" +done + +if [ "${hadoop_version%.*.*}" -gt 2 ]; then + echo "Verifying configs" + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest +fi + +if [ -z "${hadoop_jars}" ]; then + echo "Hadoop client jars not given; getting them from 'hadoop classpath'" + hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath) +fi + +if [ -n "${clean}" ]; then + echo "Cleaning out HDFS..." + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/ + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data +fi + +echo "Listing HDFS contents" +redirect_and_run "${working_dir}/hadoop_cluster_smoke" \ + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R / + +echo "Starting up HBase" +HBASE_CONF_DIR="${working_dir}/hbase-conf/" "${component_install}/bin/start-hbase.sh" + +sleep_time=2 +until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <"${working_dir}/table_create.log" 2>&1 < 1000, SPLITALGO => 'UniformSplit'} +EOF + +echo "writing out example TSV to example.tsv" +cat >"${working_dir}/example.tsv" <"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" </dev/null | tail -n 1) +if [ ! "${import_rowcount}" -eq 48 ]; then + echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}." + exit 2 +fi + +echo "Building shaded client example." +cat >"${working_dir}/HBaseClientReadWriteExample.java" < regions = new LinkedList<>(); + try (Admin admin = connection.getAdmin()) { + final ClusterMetrics cluster = admin.getClusterMetrics(); + System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount())); + for (ServerMetrics server : cluster.getLiveServerMetrics().values()) { + for (RegionMetrics region : server.getRegionMetrics().values()) { + regions.add(region.getNameAsString()); + } + } + } + final Path listing = new Path("example-region-listing.data"); + System.out.println("Writing list to HDFS"); + try (FileSystem fs = FileSystem.newInstance(hadoop)) { + final Path path = fs.makeQualified(listing); + try (FSDataOutputStream out = fs.create(path)) { + out.writeInt(regions.size()); + for (String region : regions) { + out.writeUTF(region); + } + out.hsync(); + } + } + final List puts = new LinkedList<>(); + final Put marker = new Put(new byte[] { (byte)0 }); + System.out.println("Reading list from HDFS"); + try (FileSystem fs = FileSystem.newInstance(hadoop)) { + final Path path = fs.makeQualified(listing); + final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); + try (FSDataInputStream in = fs.open(path)) { + final int count = in.readInt(); + marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count)); + for(int i = 0; i < count; i++) { + builder.clear(); + final byte[] row = Bytes.toBytes(in.readUTF()); + final Put put = new Put(row); + builder.setRow(row); + builder.setFamily(FAMILY_BYTES); + builder.setType(Cell.Type.Put); + put.add(builder.build()); + puts.add(put); + } + } + } + System.out.println("Writing list into HBase table"); + try (Table table = connection.getTable(TableName.valueOf("test:example"))) { + table.put(marker); + table.put(puts); + } + } + } +} +EOF +redirect_and_run "${working_dir}/hbase-shaded-client-compile" \ + javac -cp "${component_install}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java" +echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table." +# The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190. +redirect_and_run "${working_dir}/hbase-shaded-client-example" \ + java -cp "${working_dir}/hbase-conf/:${component_install}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample + +echo "Checking on results of example program." +"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data" + +"${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" </dev/null | tail -n 1) +if [ "${example_rowcount}" -gt "1050" ]; then + echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 2 for example's use of meta/root regions, and 1 for example's count record" +else + echo "ERROR: Only found ${example_rowcount} rows." +fi + +) -- 2.16.1