From 4881b13342b61e1e2cd39a572237d36f16dc7abe Mon Sep 17 00:00:00 2001 From: Sean Busbey Date: Mon, 9 Apr 2018 13:37:44 -0500 Subject: [PATCH] HBASE-20332 shaded mapreduce module shouldn't include hadoop * modify the jar checking script to take args; make hadoop stuff optional * separate out checking the artifacts that have hadoop vs those that don't. * * Unfortunately means we need two modules for checking things * * put in a safety check that the support script for checking jar contents is maintained in both modules * move hadoop deps for the mapreduce module to provided. we should be getting stuff from hadoop at runtime for the non-shaded artifact as well. * * have to carve out an exception for o.a.hadoop.metrics2. :( * fix duplicated class warning --- hbase-mapreduce/pom.xml | 21 ++ hbase-shaded/hbase-shaded-check-invariants/pom.xml | 52 +++-- .../resources/ensure-jars-have-correct-contents.sh | 92 +++++++-- hbase-shaded/hbase-shaded-mapreduce/pom.xml | 183 +++++++++++++++++- .../pom.xml | 215 +++++++++++++++++++++ .../resources/ensure-jars-have-correct-contents.sh | 129 +++++++++++++ hbase-shaded/pom.xml | 12 ++ 7 files changed, 668 insertions(+), 36 deletions(-) create mode 100644 hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml create mode 100644 hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh diff --git a/hbase-mapreduce/pom.xml b/hbase-mapreduce/pom.xml index af80737644..06144a678e 100644 --- a/hbase-mapreduce/pom.xml +++ b/hbase-mapreduce/pom.xml @@ -193,9 +193,11 @@ test-jar test + org.apache.hbase hbase-server + provided org.apache.hbase @@ -280,9 +282,17 @@ findbugs-annotations true + org.apache.hadoop hadoop-common + provided org.apache.htrace @@ -333,6 +343,7 @@ org.apache.hadoop hadoop-hdfs + provided org.apache.htrace @@ -364,6 +375,7 @@ org.apache.hadoop hadoop-mapreduce-client-core + provided com.google.guava @@ -414,13 +426,22 @@ ${hadoop-three.version} + org.apache.hadoop hadoop-common + provided org.apache.hadoop hadoop-hdfs + provided diff --git a/hbase-shaded/hbase-shaded-check-invariants/pom.xml b/hbase-shaded/hbase-shaded-check-invariants/pom.xml index 7322769f0b..83a917b799 100644 --- a/hbase-shaded/hbase-shaded-check-invariants/pom.xml +++ b/hbase-shaded/hbase-shaded-check-invariants/pom.xml @@ -34,11 +34,15 @@ - - org.apache.hbase - hbase-shaded-client - ${project.version} - + org.apache.hbase hbase-shaded-mapreduce @@ -113,6 +117,8 @@ com.github.stephenc.findbugs:* org.apache.htrace:* + + org.apache.hadoop:* @@ -158,18 +164,37 @@ - org.codehaus.mojo exec-maven-plugin 1.6.0 + + + make-sure-validation-files-are-in-sync + validate + + exec + + + diff + false + + ../hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + ../hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + + + + check-jar-contents integration-test @@ -180,6 +205,9 @@ ${shell-executable} ${project.build.testOutputDirectory} false + ensure-jars-have-correct-contents.sh ${hbase-client-artifacts} diff --git a/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh index 8bda8ce953..a2bb332e3c 100644 --- a/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh +++ b/hbase-shaded/hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -15,33 +15,67 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Usage: $0 [/path/to/some/example.jar:/path/to/another/example/created.jar] -# -# accepts a single command line argument with a colon separated list of -# paths to jars to check. Iterates through each such passed jar and checks -# all the contained paths to make sure they follow the below constructed -# safe list. +set -e +function usage { + echo "Usage: ${0} [options] [/path/to/some/example.jar:/path/to/another/example/created.jar]" + echo "" + echo " accepts a single command line argument with a colon separated list of" + echo " paths to jars to check. Iterates through each such passed jar and checks" + echo " all the contained paths to make sure they follow the below constructed" + echo " safe list." + echo "" + echo " --allow-hadoop Include stuff from the Apache Hadoop project in the list" + echo " of allowed jar contents. default: false" + echo " --debug print more info to stderr" + exit 1 +} +# if no args specified, show usage +if [ $# -lt 1 ]; then + usage +fi + +# Get arguments +declare allow_hadoop +declare debug +while [ $# -gt 0 ] +do + case "$1" in + --allow-hadoop) shift; allow_hadoop="true";; + --debug) shift; debug="true";; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have jars to check. +if [ $# -lt 1 ]; then + usage +fi +if [ -n "${debug}" ]; then + echo "[DEBUG] Checking on jars: $@" >&2 + echo "jar command is: $(which jar)" >&2 + echo "grep command is: $(which grep)" >&2 + grep -V >&2 || true +fi + +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") -# we have to allow the directories that lead to the org/apache/hadoop dir -allowed_expr="(^org/$|^org/apache/$" +# we have to allow the directories that lead to the hbase dirs +allowed_expr="(^org/$|^org/apache/$|^org/apache/hadoop/$" # We allow the following things to exist in our client artifacts: -# * classes in packages that start with org.apache.hadoop, which by -# convention should be in a path that looks like org/apache/hadoop -allowed_expr+="|^org/apache/hadoop/" +# * classes in packages that start with org.apache.hadoop.hbase, which by +# convention should be in a path that looks like org/apache/hadoop/hbase +allowed_expr+="|^org/apache/hadoop/hbase" # * classes in packages that start with org.apache.hbase allowed_expr+="|^org/apache/hbase/" # * whatever in the "META-INF" directory allowed_expr+="|^META-INF/" # * the folding tables from jcodings allowed_expr+="|^tables/" -# * Hadoop's and HBase's default configuration files, which have the form +# * HBase's default configuration files, which have the form # "_module_-default.xml" -allowed_expr+="|^[^-]*-default.xml$" -# * Hadoop's versioning properties files, which have the form -# "_module_-version-info.properties" -allowed_expr+="|^[^-]*-version-info.properties$" -# * Hadoop's application classloader properties file. -allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +allowed_expr+="|^hbase-default.xml$" # public suffix list used by httpcomponents allowed_expr+="|^mozilla/$" allowed_expr+="|^mozilla/public-suffix-list.txt$" @@ -51,12 +85,30 @@ allowed_expr+="|^properties.dtd$" allowed_expr+="|^PropertyList-1.0.dtd$" +if [ -n "${allow_hadoop}" ]; then + # * classes in packages that start with org.apache.hadoop, which by + # convention should be in a path that looks like org/apache/hadoop + allowed_expr+="|^org/apache/hadoop/" + # * Hadoop's default configuration files, which have the form + # "_module_-default.xml" + allowed_expr+="|^[^-]*-default.xml$" + # * Hadoop's versioning properties files, which have the form + # "_module_-version-info.properties" + allowed_expr+="|^[^-]*-version-info.properties$" + # * Hadoop's application classloader properties file. + allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +else + # We have some classes for integrating with the Hadoop Metrics2 system + # that have to be in a particular package space due to access rules. + allowed_expr+="|^org/apache/hadoop/metrics2" +fi + + allowed_expr+=")" declare -i bad_artifacts=0 declare -a bad_contents -IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") for artifact in "${artifact_list[@]}"; do - bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}")) + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}" || true)) if [ ${#bad_contents[@]} -gt 0 ]; then echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" echo " Please check the following and either correct the build or update" diff --git a/hbase-shaded/hbase-shaded-mapreduce/pom.xml b/hbase-shaded/hbase-shaded-mapreduce/pom.xml index cfcc357877..f5a073ab07 100644 --- a/hbase-shaded/hbase-shaded-mapreduce/pom.xml +++ b/hbase-shaded/hbase-shaded-mapreduce/pom.xml @@ -62,6 +62,10 @@ + org.apache.hbase hbase-mapreduce @@ -158,12 +162,183 @@ release - - org.apache.maven.plugins - maven-shade-plugin - + + + org.apache.maven.plugins + maven-shade-plugin + + + aggregate-into-a-jar-with-relocated-third-parties + + + + org.apache.hadoop:* + + org.apache.hbase:hbase-resource-bundle + org.slf4j:* + com.google.code.findbugs:* + com.github.stephenc.findbugs:* + org.apache.htrace:* + org.apache.yetus:* + log4j:* + commons-logging:* + + + + + + + + + + hadoop-2.0 + + + + !hadoop.profile + + + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.htrace + htrace-core + + + net.java.dev.jets3t + jets3t + + + javax.servlet.jsp + jsp-api + + + org.mortbay.jetty + jetty + + + com.sun.jersey + jersey-server + + + com.sun.jersey + jersey-core + + + com.sun.jersey + jersey-json + + + javax.servlet + servlet-api + + + tomcat + jasper-compiler + + + tomcat + jasper-runtime + + + com.google.code.findbugs + jsr305 + + + + + org.apache.hadoop + hadoop-hdfs + provided + + + org.apache.htrace + htrace-core + + + javax.servlet.jsp + jsp-api + + + javax.servlet + servlet-api + + + io.netty + netty + + + stax + stax-api + + + xerces + xercesImpl + + + ${hadoop-two.version} + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + com.google.guava + guava + + + + + org.apache.hadoop + hadoop-auth + provided + + + + + + + hadoop-3.0 + + + hadoop.profile + 3.0 + + + + ${hadoop-three.version} + + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-hdfs + provided + + + org.apache.hadoop + hadoop-auth + provided + + + diff --git a/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml new file mode 100644 index 0000000000..f663c1ac8a --- /dev/null +++ b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/pom.xml @@ -0,0 +1,215 @@ + + + + 4.0.0 + + hbase + org.apache.hbase + 3.0.0-SNAPSHOT + ../.. + + hbase-shaded-with-hadoop-check-invariants + pom + + + Enforces our invariants for our shaded artifacts. e.g. shaded clients have + a specific set of transitive dependencies and shaded clients only contain + classes that are in particular packages. Does the enforcement through + the maven-enforcer-plugin and and integration test. + + Apache HBase Shaded Packaging Invariants (with Hadoop bundled) + + + + + + + + org.apache.hbase + hbase-shaded-client + ${project.version} + + + + com.github.stephenc.findbugs + findbugs-annotations + provided + + + log4j + log4j + provided + + + + junit + junit + provided + + + org.mockito + mockito-core + provided + + + + + + org.apache.maven.plugins + maven-site-plugin + + true + + + + org.apache.maven.plugins + maven-enforcer-plugin + + + org.codehaus.mojo + extra-enforcer-rules + 1.0-beta-6 + + + + + enforce-banned-dependencies + + enforce + + + true + + + + + + org.slf4j:* + log4j:* + commons-logging:* + + com.google.code.findbugs:* + com.github.stephenc.findbugs:* + + org.apache.htrace:* + + + + + true + + + + + + + + org.apache.maven.plugins + maven-resources-plugin + + + test-resources + pre-integration-test + + testResources + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + put-client-artifacts-in-a-property + pre-integration-test + + build-classpath + + + provided + true + hbase-client-artifacts + + + + + + org.codehaus.mojo + exec-maven-plugin + 1.6.0 + + + + make-sure-validation-files-are-in-sync + validate + + exec + + + diff + false + + ../hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + ../hbase-shaded-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh + + + + + + check-jar-contents-for-stuff-with-hadoop + integration-test + + exec + + + ${shell-executable} + ${project.build.testOutputDirectory} + false + + ensure-jars-have-correct-contents.sh + --allow-hadoop + ${hbase-client-artifacts} + + + + + + + + + diff --git a/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh new file mode 100644 index 0000000000..a2bb332e3c --- /dev/null +++ b/hbase-shaded/hbase-shaded-with-hadoop-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e +function usage { + echo "Usage: ${0} [options] [/path/to/some/example.jar:/path/to/another/example/created.jar]" + echo "" + echo " accepts a single command line argument with a colon separated list of" + echo " paths to jars to check. Iterates through each such passed jar and checks" + echo " all the contained paths to make sure they follow the below constructed" + echo " safe list." + echo "" + echo " --allow-hadoop Include stuff from the Apache Hadoop project in the list" + echo " of allowed jar contents. default: false" + echo " --debug print more info to stderr" + exit 1 +} +# if no args specified, show usage +if [ $# -lt 1 ]; then + usage +fi + +# Get arguments +declare allow_hadoop +declare debug +while [ $# -gt 0 ] +do + case "$1" in + --allow-hadoop) shift; allow_hadoop="true";; + --debug) shift; debug="true";; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have jars to check. +if [ $# -lt 1 ]; then + usage +fi +if [ -n "${debug}" ]; then + echo "[DEBUG] Checking on jars: $@" >&2 + echo "jar command is: $(which jar)" >&2 + echo "grep command is: $(which grep)" >&2 + grep -V >&2 || true +fi + +IFS=: read -r -d '' -a artifact_list < <(printf '%s\0' "$1") + +# we have to allow the directories that lead to the hbase dirs +allowed_expr="(^org/$|^org/apache/$|^org/apache/hadoop/$" +# We allow the following things to exist in our client artifacts: +# * classes in packages that start with org.apache.hadoop.hbase, which by +# convention should be in a path that looks like org/apache/hadoop/hbase +allowed_expr+="|^org/apache/hadoop/hbase" +# * classes in packages that start with org.apache.hbase +allowed_expr+="|^org/apache/hbase/" +# * whatever in the "META-INF" directory +allowed_expr+="|^META-INF/" +# * the folding tables from jcodings +allowed_expr+="|^tables/" +# * HBase's default configuration files, which have the form +# "_module_-default.xml" +allowed_expr+="|^hbase-default.xml$" +# public suffix list used by httpcomponents +allowed_expr+="|^mozilla/$" +allowed_expr+="|^mozilla/public-suffix-list.txt$" +# Comes from commons-configuration, not sure if relocatable. +allowed_expr+="|^digesterRules.xml$" +allowed_expr+="|^properties.dtd$" +allowed_expr+="|^PropertyList-1.0.dtd$" + + +if [ -n "${allow_hadoop}" ]; then + # * classes in packages that start with org.apache.hadoop, which by + # convention should be in a path that looks like org/apache/hadoop + allowed_expr+="|^org/apache/hadoop/" + # * Hadoop's default configuration files, which have the form + # "_module_-default.xml" + allowed_expr+="|^[^-]*-default.xml$" + # * Hadoop's versioning properties files, which have the form + # "_module_-version-info.properties" + allowed_expr+="|^[^-]*-version-info.properties$" + # * Hadoop's application classloader properties file. + allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +else + # We have some classes for integrating with the Hadoop Metrics2 system + # that have to be in a particular package space due to access rules. + allowed_expr+="|^org/apache/hadoop/metrics2" +fi + + +allowed_expr+=")" +declare -i bad_artifacts=0 +declare -a bad_contents +for artifact in "${artifact_list[@]}"; do + bad_contents=($(jar tf "${artifact}" | grep -v -E "${allowed_expr}" || true)) + if [ ${#bad_contents[@]} -gt 0 ]; then + echo "[ERROR] Found artifact with unexpected contents: '${artifact}'" + echo " Please check the following and either correct the build or update" + echo " the allowed list with reasoning." + echo "" + for bad_line in "${bad_contents[@]}"; do + echo " ${bad_line}" + done + bad_artifacts=${bad_artifacts}+1 + else + echo "[INFO] Artifact looks correct: '$(basename "${artifact}")'" + fi +done + +# if there was atleast one bad artifact, exit with failure +if [ "${bad_artifacts}" -gt 0 ]; then + exit 1 +fi diff --git a/hbase-shaded/pom.xml b/hbase-shaded/pom.xml index 24c515844e..dbfc2e08bb 100644 --- a/hbase-shaded/pom.xml +++ b/hbase-shaded/pom.xml @@ -42,6 +42,7 @@ hbase-shaded-client hbase-shaded-mapreduce hbase-shaded-check-invariants + hbase-shaded-with-hadoop-check-invariants @@ -118,6 +119,7 @@ maven-shade-plugin + aggregate-into-a-jar-with-relocated-third-parties package shade @@ -449,6 +451,16 @@ META-INF/ECLIPSEF.RSA + + + commons-beanutils:commons-beanutils-core + + org/apache/commons/collections/*.class + + org.apache.hbase:hbase-server -- 2.16.1