From a199cb58df28f4e2749b69d0e676cefadacdd63f Mon Sep 17 00:00:00 2001 From: Sean Mackrory Date: Wed, 22 May 2019 12:30:28 -0400 Subject: [PATCH] HBASE-22437 HBOSS: Add Hadoop 2 / 3 profiles. Signed-off-by: Josh Elser --- hbase-oss/README.md | 28 +++++++++-- .../hadoop/hbase/oss/sync/AutoLock.java | 13 ++++- .../hbase/oss/contract/TestHBOSSContract.java | 31 ++++++++++-- pom.xml | 50 ++++++++++++++++++- 4 files changed, 111 insertions(+), 11 deletions(-) diff --git a/hbase-oss/README.md b/hbase-oss/README.md index 1d100af..3b71ee9 100644 --- a/hbase-oss/README.md +++ b/hbase-oss/README.md @@ -76,12 +76,20 @@ use and as such there's a dependency on DynamoDB anyway. ## Storage Implementations Currently HBOSS is primarily designed for and exclusively tested with Hadoop's -s3a client against Amazon S3. S3Guard must be enabled. Both this requirement and -the use of an external data store for locking have serious implications if any -other client accesses the same data. +s3a client against Amazon S3. *S3Guard must be enabled, which is available in +Hadoop 2.9.0, 3.0.0, and higher*. + +Both the use of S3Guard and Zookeeper for locking (i.e. Zookeeper) have +implications for other clients that are not configured to share the same +metadata store and Zookeeper ensemble. Ideally, all clients should be have the +same configuration in these respects. Read-only clients may not share these +resources with the HBase processes, but they will not have the added safety +provided by these features. Clients that do not share these resources and modify +data can compromise the correctness of HBase. + In theory, HBOSS could also work well with Google's cloud storage client (gs) -or other object storage clients. +or other object storage clients, but this has not been tested. ## FileSystem Instantiation @@ -121,3 +129,15 @@ other storage in src/test/resources/core-site.xml. Any required credentials or other individal configuration should be set in src/test/resources/auth-keys.xml, which should be ignored by source control. + +### Hadoop Versions + +There are Maven profiles defined for Hadoop 2 and Hadoop 3 major versions. +These are activated via the property `hadoop.profile`. These profiles choose +a specific Hadoop release in that major line, defaulting to versions as defined +in `hadoop2.version` and `hadoop3.version`. By default, Hadoop 3 is used by +the build. + + mvn verify # Defaults to Hadoop 3 + mvn verify -Dhadoop.profile=3 # Activate Hadoop 3 + mvn verify -Dhadoop.profile=2 # Activate Hadoop 2 diff --git a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/AutoLock.java b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/AutoLock.java index 3b57d20..18eb8cf 100644 --- a/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/AutoLock.java +++ b/hbase-oss/src/main/java/org/apache/hadoop/hbase/oss/sync/AutoLock.java @@ -131,10 +131,19 @@ public interface AutoLock extends AutoCloseable { * Returns the position in the wrapped stream. This should not be accessed * after the stream has been closed. Unlike most other functions in this * class, this is not enforced because this function shouldn't throw - * IOExceptions. + * IOExceptions in Hadoop 3 + * + * FSDataOutputStream.getPos() declares that it can throw IOExceptions in Hadoop + * 2, but the implementation never does. So it could, in theory, but no + * situation in which it actually would is know. */ public long getPos() { - return stream.getPos(); + try { + return stream.getPos(); + } catch (Exception e) { + // We can't specify IOException and still compile against Hadoop 3 + throw new RuntimeException(e); + } } @Override diff --git a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/contract/TestHBOSSContract.java b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/contract/TestHBOSSContract.java index 1ba31f9..6ad69ca 100644 --- a/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/contract/TestHBOSSContract.java +++ b/hbase-oss/src/test/java/org/apache/hadoop/hbase/oss/contract/TestHBOSSContract.java @@ -18,13 +18,14 @@ package org.apache.hadoop.hbase.oss.contract; +import java.lang.reflect.Method; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystemContractBaseTest; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.oss.HBaseObjectStoreSemantics; import org.apache.hadoop.hbase.oss.TestUtils; -import org.junit.Assume; import org.junit.Assert; +import org.junit.Assume; import org.junit.Before; import org.junit.Rule; import org.junit.Test; @@ -65,7 +66,11 @@ public class TestHBOSSContract extends FileSystemContractBaseTest { @Test public void testMkdirsWithUmask() throws Exception { // Skipped in the hadoop-aws tests - Assume.assumeFalse(TestUtils.fsIs(TestUtils.S3A, conf)); + if (TestUtils.fsIs(TestUtils.S3A, conf)) { + // It would be nice to use Assume.assumeFalse instead of if, but Hadoop 2 + // builds pull in JUnit 3, and this is the only way to skip the test. + return; + } super.testMkdirsWithUmask(); } @@ -100,7 +105,25 @@ public class TestHBOSSContract extends FileSystemContractBaseTest { @Test public void testMoveDirUnderParent() throws Throwable { // Skipped in the hadoop-aws tests - Assume.assumeFalse(TestUtils.fsIs(TestUtils.S3A, conf)); - super.testMoveDirUnderParent(); + if (TestUtils.fsIs(TestUtils.S3A, conf)) { + // It would be nice to use Assume.assumeFalse instead of if, but Hadoop 2 + // builds pull in JUnit 3, and this is the only way to skip the test. + return; + } + + // Can't just call super.testMoveDirUnderParent() because it doesn't + // exist in older Hadoop versions + String methodName = "testMoveDirUnderParent"; + Method method = null; + boolean skip = false; + try { + method = super.getClass().getMethod(methodName, (Class[]) null); + } catch (NoSuchMethodException e) { + skip = true; + } + Assume.assumeFalse("Unable to find method " + methodName, skip); + if (!skip) { + method.invoke(this, (Object[]) null); + } } } diff --git a/pom.xml b/pom.xml index ec62ba5..c14a6b5 100644 --- a/pom.xml +++ b/pom.xml @@ -39,7 +39,8 @@ 2.5 3.6 4.0.0 - 3.2.0 + 2.9.2 + 3.2.0 2.1.4 2.2.0 4.12 @@ -68,4 +69,51 @@ + + + + hadoop2 + + + hadoop.profile + 2 + + + + ${hadoop2.version} + + + + + org.apache.hadoop + hadoop-hdfs + ${hadoop.version} + test + + + + + hadoop3 + + + hadoop.profile + 3 + + + + ${hadoop3.version} + + + + hadoop-default + + + !hadoop.profile + + + + ${hadoop3.version} + + + -- 2.18.0