diff --git a/.gitignore b/.gitignore index eb1fc96..2a82ba7 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,10 @@ target build +# Filesystem contract test options and credentials +auth-keys.xml +azure-auth-keys.xml + # External tool builders */.externalToolBuilders */maven-eclipse.xml @@ -22,12 +26,6 @@ build hadoop-common-project/hadoop-kms/downloads/ hadoop-hdfs-project/hadoop-hdfs/downloads hadoop-hdfs-project/hadoop-hdfs-httpfs/downloads -hadoop-common-project/hadoop-common/src/test/resources/contract-test-options.xml -hadoop-tools/hadoop-openstack/src/test/resources/contract-test-options.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/tla/yarnregistry.toolbox yarnregistry.pdf -hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml -hadoop-tools/hadoop-aws/src/test/resources/contract-test-options.xml -hadoop-tools/hadoop-azure/src/test/resources/azure-auth-keys.xml -hadoop-tools/hadoop-openstack/src/test/resources/auth-keys.xml patchprocess/ diff --git a/LICENSE.txt b/LICENSE.txt index 0e4b492..61ebbd6 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -345,6 +345,38 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +For hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h: + +Copyright (c) 2012 The FreeBSD Foundation +All rights reserved. + +This software was developed by Pawel Jakub Dawidek under sponsorship from +the FreeBSD Foundation. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +============= + The binary distribution of this product bundles binaries of leveldb (http://code.google.com/p/leveldb/), which is available under the following license: diff --git a/hadoop-assemblies/pom.xml b/hadoop-assemblies/pom.xml index 0ec1dc6..91501f6 100644 --- a/hadoop-assemblies/pom.xml +++ b/hadoop-assemblies/pom.xml @@ -23,12 +23,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-assemblies - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Assemblies Apache Hadoop Assemblies diff --git a/hadoop-build-tools/pom.xml b/hadoop-build-tools/pom.xml index 02b7862..71a80dc 100644 --- a/hadoop-build-tools/pom.xml +++ b/hadoop-build-tools/pom.xml @@ -18,7 +18,7 @@ hadoop-main org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 hadoop-build-tools diff --git a/hadoop-client/pom.xml b/hadoop-client/pom.xml index c8d7fa9..7e28e6a 100644 --- a/hadoop-client/pom.xml +++ b/hadoop-client/pom.xml @@ -18,12 +18,12 @@ org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project-dist org.apache.hadoop hadoop-client - 2.8.0-SNAPSHOT + 2.8.0 jar Apache Hadoop Client @@ -100,7 +100,7 @@ org.apache.hadoop - hadoop-hdfs-client + hadoop-hdfs compile @@ -123,6 +123,30 @@ javax.servlet servlet-api + + io.netty + netty + + + io.netty + netty-all + + + xerces + xercesImpl + + + commons-daemon + commons-daemon + + + org.mortbay.jetty + jetty-util + + + org.fusesource.leveldbjni + leveldbjni-all + diff --git a/hadoop-common-project/hadoop-annotations/pom.xml b/hadoop-common-project/hadoop-annotations/pom.xml index c2bbc5f..dac090a 100644 --- a/hadoop-common-project/hadoop-annotations/pom.xml +++ b/hadoop-common-project/hadoop-annotations/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-annotations - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Annotations Apache Hadoop Annotations jar diff --git a/hadoop-common-project/hadoop-auth-examples/pom.xml b/hadoop-common-project/hadoop-auth-examples/pom.xml index 20f1012..d9a0f99 100644 --- a/hadoop-common-project/hadoop-auth-examples/pom.xml +++ b/hadoop-common-project/hadoop-auth-examples/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-auth-examples - 2.8.0-SNAPSHOT + 2.8.0 war Apache Hadoop Auth Examples diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml index 9d99a05..3ebbc06 100644 --- a/hadoop-common-project/hadoop-auth/pom.xml +++ b/hadoop-common-project/hadoop-auth/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-auth - 2.8.0-SNAPSHOT + 2.8.0 jar Apache Hadoop Auth diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 42a2f21..4da088a 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project-dist org.apache.hadoop hadoop-common - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Common Apache Hadoop Common jar diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java index c144711..04946ad 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Client.java @@ -71,7 +71,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; -import static org.apache.hadoop.ipc.RpcConstants.CONNECTION_CONTEXT_CALL_ID; import static org.apache.hadoop.ipc.RpcConstants.PING_CALL_ID; /** A client for an IPC service. IPC calls take a single {@link Writable} as a @@ -1767,7 +1766,9 @@ public void close() throws Exception { } void setSaslClient(SaslRpcClient client) throws IOException { - setInputStream(client.getInputStream(in)); + // Wrap the input stream in a BufferedInputStream to fill the buffer + // before reading its length (HADOOP-14062). + setInputStream(new BufferedInputStream(client.getInputStream(in))); setOutputStream(client.getOutputStream(out)); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java index 21d3dd6..4b14059 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/AbstractDelegationTokenSecretManager.java @@ -21,7 +21,7 @@ import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.IOException; -import java.util.Arrays; +import java.security.MessageDigest; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -467,7 +467,7 @@ public synchronized String getTokenTrackingId(TokenIdent identifier) { public synchronized void verifyToken(TokenIdent identifier, byte[] password) throws InvalidToken { byte[] storedPassword = retrievePassword(identifier); - if (!Arrays.equals(password, storedPassword)) { + if (!MessageDigest.isEqual(password, storedPassword)) { throw new InvalidToken("token " + formatTokenId(identifier) + " is invalid, password doesn't match"); } @@ -516,7 +516,7 @@ public synchronized long renewToken(Token token, + id.getSequenceNumber()); } byte[] password = createPassword(token.getIdentifier(), key.getKey()); - if (!Arrays.equals(password, token.getPassword())) { + if (!MessageDigest.isEqual(password, token.getPassword())) { throw new AccessControlException(renewer + " is trying to renew a token " + formatTokenId(id) + " with wrong password"); diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 900cca0..b119bc7 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -827,6 +827,15 @@ + fs.s3a.security.credential.provider.path + + + Optional comma separated list of credential providers, a list + which is prepended to that set in hadoop.security.credential.provider.path + + + + fs.s3a.connection.maximum 15 Controls the maximum number of simultaneous connections to S3. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md index 66c25e5..a1c2307 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/ClusterSetup.md @@ -208,7 +208,7 @@ The following parameters can be used to control the node health monitoring scrip |:---- |:---- |:---- | | `yarn.nodemanager.health-checker.script.path` | Node health script | Script to check for node's health status. | | `yarn.nodemanager.health-checker.script.opts` | Node health script options | Options for script to check for node's health status. | -| `yarn.nodemanager.health-checker.script.interval-ms` | Node health script interval | Time interval for running health script. | +| `yarn.nodemanager.health-checker.interval-ms` | Node health script interval | Time interval for running health script. | | `yarn.nodemanager.health-checker.script.timeout-ms` | Node health script timeout interval | Timeout for health script execution. | The health checker script is not supposed to give ERROR if only some of the local disks become bad. NodeManager has the ability to periodically check the health of the local disks (specifically checks nodemanager-local-dirs and nodemanager-log-dirs) and after reaching the threshold of number of bad directories based on the value set for the config property yarn.nodemanager.disk-health-checker.min-healthy-disks, the whole node is marked unhealthy and this info is sent to resource manager also. The boot disk is either raided or a failure in the boot disk is identified by the health checker script. diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md b/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md index 209b48d..de871b1 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/CredentialProviderAPI.md @@ -102,6 +102,7 @@ In summary, first, provision the credentials into a provider then configure the |YARN |WebAppUtils uptakes the use of the credential provider API through the new method on Configuration called getPassword. This provides an alternative to storing the passwords in clear text within the ssl-server.xml file while maintaining backward compatibility.|TODO| |AWS
S3/S3A |Uses Configuration.getPassword to get the S3 credentials. They may be resolved through the credential provider API or from the config for backward compatibility.|[AWS S3/S3A Usage](../../hadoop-aws/tools/hadoop-aws/index.html)| |Azure
WASB |Uses Configuration.getPassword to get the WASB credentials. They may be resolved through the credential provider API or from the config for backward compatibility.|[Azure WASB Usage](../../hadoop-azure/index.html)| +|Azure
ADLS |Uses Configuration.getPassword to get the ADLS credentials. They may be resolved through the credential provider API or from the config for backward compatibility.|[Azure ADLS Usage](../../hadoop-azure-datalake/index.html)| |Apache
Accumulo|The trace.password property is used by the Tracer to authenticate with Accumulo and persist the traces in the trace table. The credential provider API is used to acquire the trace.password from a provider or from configuration for backward compatibility.|TODO| |Apache
Slider |A capability has been added to Slider to prompt the user for needed passwords and store them using CredentialProvider so they can be retrieved by an app later.|TODO| |Apache
Hive |Protection of the metastore password, SSL related passwords and JDO string password has been added through the use of the Credential Provider API|TODO| diff --git a/hadoop-common-project/hadoop-kms/pom.xml b/hadoop-common-project/hadoop-kms/pom.xml index 1181dfb..dac856f 100644 --- a/hadoop-common-project/hadoop-kms/pom.xml +++ b/hadoop-common-project/hadoop-kms/pom.xml @@ -22,12 +22,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-kms - 2.8.0-SNAPSHOT + 2.8.0 war Apache Hadoop KMS diff --git a/hadoop-common-project/hadoop-minikdc/pom.xml b/hadoop-common-project/hadoop-minikdc/pom.xml index 5470a52..1920b3f 100644 --- a/hadoop-common-project/hadoop-minikdc/pom.xml +++ b/hadoop-common-project/hadoop-minikdc/pom.xml @@ -18,13 +18,13 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project 4.0.0 org.apache.hadoop hadoop-minikdc - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MiniKDC Apache Hadoop MiniKDC jar diff --git a/hadoop-common-project/hadoop-nfs/pom.xml b/hadoop-common-project/hadoop-nfs/pom.xml index 932c23e..de787e2 100644 --- a/hadoop-common-project/hadoop-nfs/pom.xml +++ b/hadoop-common-project/hadoop-nfs/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-nfs - 2.8.0-SNAPSHOT + 2.8.0 jar Apache Hadoop NFS diff --git a/hadoop-common-project/pom.xml b/hadoop-common-project/pom.xml index dc6f150..fbcc215 100644 --- a/hadoop-common-project/pom.xml +++ b/hadoop-common-project/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-common-project - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Common Project Apache Hadoop Common Project pom diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index 2708ce0..7c164c5 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-dist - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Distribution Apache Hadoop Distribution jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml index 0cf0d84..0f17629 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml @@ -20,12 +20,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project-dist org.apache.hadoop hadoop-hdfs-client - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop HDFS Client Apache Hadoop HDFS Client jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index 463ce23..873fb03 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -505,33 +505,36 @@ else if (offset >= locatedBlocks.getFileLength()) { } else { // search cached blocks first - int targetBlockIdx = locatedBlocks.findBlock(offset); - if (targetBlockIdx < 0) { // block is not cached - targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); - // fetch more blocks - final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset); - assert (newBlocks != null) : "Could not find target position " + offset; - locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); - } - blk = locatedBlocks.get(targetBlockIdx); + blk = fetchBlockAt(offset, 0, true); } return blk; } } /** Fetch a block from namenode and cache it */ - protected void fetchBlockAt(long offset) throws IOException { + protected LocatedBlock fetchBlockAt(long offset) throws IOException { + return fetchBlockAt(offset, 0, false); // don't use cache + } + + /** Fetch a block from namenode and cache it */ + private LocatedBlock fetchBlockAt(long offset, long length, boolean useCache) + throws IOException { synchronized(infoLock) { int targetBlockIdx = locatedBlocks.findBlock(offset); if (targetBlockIdx < 0) { // block is not cached targetBlockIdx = LocatedBlocks.getInsertIndex(targetBlockIdx); + useCache = false; } - // fetch blocks - final LocatedBlocks newBlocks = dfsClient.getLocatedBlocks(src, offset); - if (newBlocks == null) { - throw new IOException("Could not find target position " + offset); + if (!useCache) { // fetch blocks + final LocatedBlocks newBlocks = (length == 0) + ? dfsClient.getLocatedBlocks(src, offset) + : dfsClient.getLocatedBlocks(src, offset, length); + if (newBlocks == null || newBlocks.locatedBlockCount() == 0) { + throw new EOFException("Could not find target position " + offset); + } + locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); } - locatedBlocks.insertRange(targetBlockIdx, newBlocks.getLocatedBlocks()); + return locatedBlocks.get(targetBlockIdx); } } @@ -586,28 +589,15 @@ protected void fetchBlockAt(long offset) throws IOException { assert (locatedBlocks != null) : "locatedBlocks is null"; List blockRange = new ArrayList<>(); // search cached blocks first - int blockIdx = locatedBlocks.findBlock(offset); - if (blockIdx < 0) { // block is not cached - blockIdx = LocatedBlocks.getInsertIndex(blockIdx); - } long remaining = length; long curOff = offset; while(remaining > 0) { - LocatedBlock blk = null; - if(blockIdx < locatedBlocks.locatedBlockCount()) - blk = locatedBlocks.get(blockIdx); - if (blk == null || curOff < blk.getStartOffset()) { - LocatedBlocks newBlocks; - newBlocks = dfsClient.getLocatedBlocks(src, curOff, remaining); - locatedBlocks.insertRange(blockIdx, newBlocks.getLocatedBlocks()); - continue; - } + LocatedBlock blk = fetchBlockAt(curOff, remaining, true); assert curOff >= blk.getStartOffset() : "Block not found"; blockRange.add(blk); long bytesRead = blk.getStartOffset() + blk.getBlockSize() - curOff; remaining -= bytesRead; curOff += bytesRead; - blockIdx++; } return blockRange; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto index 451e2ab..7e2eb9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/hdfs.proto @@ -88,6 +88,7 @@ message DatanodeInfoProto { optional uint64 lastUpdate = 6 [default = 0]; optional uint32 xceiverCount = 7 [default = 0]; optional string location = 8; + optional uint64 nonDfsUsed = 9; enum AdminState { NORMAL = 0; DECOMMISSION_INPROGRESS = 1; @@ -99,7 +100,6 @@ message DatanodeInfoProto { optional uint64 cacheUsed = 12 [default = 0]; optional uint64 lastUpdateMonotonic = 13 [default = 0]; optional string upgradeDomain = 14; - optional uint64 nonDfsUsed = 15; } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml index f48b439..74371f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml @@ -22,12 +22,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-hdfs-httpfs - 2.8.0-SNAPSHOT + 2.8.0 war Apache Hadoop HttpFS diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml index f4f9033..f7b9e3e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml @@ -20,12 +20,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project-dist org.apache.hadoop hadoop-hdfs-native-client - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop HDFS Native Client Apache Hadoop HDFS Native Client jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml index b09905b..b20e6e7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml @@ -20,12 +20,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-hdfs-nfs - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop HDFS-NFS Apache Hadoop HDFS-NFS jar diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 824b0ca..ad8fa59 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -20,12 +20,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project-dist org.apache.hadoop hadoop-hdfs - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop HDFS Apache Hadoop HDFS jar @@ -184,11 +184,6 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> compile - com.twitter - hpack - compile - - xerces xercesImpl compile diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml index 45e8e68..ab1329e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/contrib/bkjournal/pom.xml @@ -20,13 +20,13 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../../../../hadoop-project org.apache.hadoop.contrib hadoop-hdfs-bkjournal - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop HDFS BookKeeper Journal Apache Hadoop HDFS BookKeeper Journal jar diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java index caee6cc..ad830f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/DatanodeHttpServer.java @@ -26,8 +26,8 @@ import javax.servlet.ServletContext; import javax.servlet.ServletException; +import io.netty.bootstrap.ChannelFactory; import io.netty.bootstrap.ServerBootstrap; -import io.netty.channel.ChannelFactory; import io.netty.channel.ChannelFuture; import io.netty.channel.ChannelInitializer; import io.netty.channel.ChannelOption; @@ -144,8 +144,16 @@ public DatanodeHttpServer(final Configuration conf, .childHandler(new ChannelInitializer() { @Override protected void initChannel(SocketChannel ch) throws Exception { - ch.pipeline().addLast(new PortUnificationServerHandler(jettyAddr, - conf, confForCreate, restCsrfPreventionFilter)); + ChannelPipeline p = ch.pipeline(); + p.addLast(new HttpRequestDecoder(), + new HttpResponseEncoder()); + if (restCsrfPreventionFilter != null) { + p.addLast(new RestCsrfPreventionFilterHandler( + restCsrfPreventionFilter)); + } + p.addLast( + new ChunkedWriteHandler(), + new URLDispatcher(jettyAddr, conf, confForCreate)); } }); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/PortUnificationServerHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/PortUnificationServerHandler.java deleted file mode 100644 index ff10c6d..0000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/PortUnificationServerHandler.java +++ /dev/null @@ -1,99 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.server.datanode.web; - -import java.net.InetSocketAddress; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.server.datanode.web.dtp.DtpHttp2Handler; -import org.apache.hadoop.security.http.RestCsrfPreventionFilter; - -import io.netty.buffer.ByteBuf; -import io.netty.buffer.ByteBufUtil; -import io.netty.channel.ChannelHandlerContext; -import io.netty.handler.codec.ByteToMessageDecoder; -import io.netty.handler.codec.http.HttpServerCodec; -import io.netty.handler.codec.http2.Http2CodecUtil; -import io.netty.handler.stream.ChunkedWriteHandler; - -/** - * A port unification handler to support HTTP/1.1 and HTTP/2 on the same port. - */ -@InterfaceAudience.Private -public class PortUnificationServerHandler extends ByteToMessageDecoder { - - private static final ByteBuf HTTP2_CLIENT_CONNECTION_PREFACE = Http2CodecUtil - .connectionPrefaceBuf(); - - // we only want to support HTTP/1.1 and HTTP/2, so the first 3 bytes is - // enough. No HTTP/1.1 request could start with "PRI" - private static final int MAGIC_HEADER_LENGTH = 3; - - private final InetSocketAddress proxyHost; - - private final Configuration conf; - - private final Configuration confForCreate; - - private final RestCsrfPreventionFilter restCsrfPreventionFilter; - - public PortUnificationServerHandler(InetSocketAddress proxyHost, - Configuration conf, Configuration confForCreate, - RestCsrfPreventionFilter restCsrfPreventionFilter) { - this.proxyHost = proxyHost; - this.conf = conf; - this.confForCreate = confForCreate; - this.restCsrfPreventionFilter = restCsrfPreventionFilter; - } - - private void configureHttp1(ChannelHandlerContext ctx) { - ctx.pipeline().addLast(new HttpServerCodec()); - if (this.restCsrfPreventionFilter != null) { - ctx.pipeline().addLast(new RestCsrfPreventionFilterHandler( - this.restCsrfPreventionFilter)); - } - ctx.pipeline().addLast(new ChunkedWriteHandler(), - new URLDispatcher(proxyHost, conf, confForCreate)); - } - - private void configureHttp2(ChannelHandlerContext ctx) { - if (this.restCsrfPreventionFilter != null) { - ctx.pipeline().addLast(new RestCsrfPreventionFilterHandler( - this.restCsrfPreventionFilter)); - } - ctx.pipeline().addLast(new DtpHttp2Handler()); - } - - @Override - protected void decode(ChannelHandlerContext ctx, ByteBuf in, - List out) throws Exception { - if (in.readableBytes() < MAGIC_HEADER_LENGTH) { - return; - } - if (ByteBufUtil.equals(in, 0, HTTP2_CLIENT_CONNECTION_PREFACE, 0, - MAGIC_HEADER_LENGTH)) { - configureHttp2(ctx); - } else { - configureHttp1(ctx); - } - ctx.pipeline().remove(this); - } - -} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java index f2f0533..4958bb5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/RestCsrfPreventionFilterHandler.java @@ -17,8 +17,8 @@ */ package org.apache.hadoop.hdfs.server.datanode.web; -import static io.netty.handler.codec.http.HttpHeaderNames.CONNECTION; -import static io.netty.handler.codec.http.HttpHeaderValues.CLOSE; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONNECTION; +import static io.netty.handler.codec.http.HttpHeaders.Values.CLOSE; import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; @@ -119,7 +119,7 @@ public String getHeader(String header) { @Override public String getMethod() { - return req.method().name(); + return req.getMethod().name(); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java index 6b0f013..ffa7681 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/SimpleHttpProxyHandler.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hdfs.server.datanode.web; -import static io.netty.handler.codec.http.HttpHeaderNames.CONNECTION; -import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; -import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; import io.netty.bootstrap.Bootstrap; import io.netty.buffer.Unpooled; import io.netty.channel.Channel; @@ -34,14 +31,17 @@ import io.netty.channel.socket.nio.NioSocketChannel; import io.netty.handler.codec.http.DefaultFullHttpRequest; import io.netty.handler.codec.http.DefaultHttpResponse; -import io.netty.handler.codec.http.HttpHeaderValues; import io.netty.handler.codec.http.HttpRequest; import io.netty.handler.codec.http.HttpRequestEncoder; import io.netty.handler.codec.http.HttpResponseEncoder; +import org.apache.commons.logging.Log; import java.net.InetSocketAddress; -import org.apache.commons.logging.Log; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONNECTION; +import static io.netty.handler.codec.http.HttpHeaders.Values; +import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; +import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; /** * Dead simple session-layer HTTP proxy. It gets the HTTP responses @@ -98,7 +98,7 @@ public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) { @Override public void channelRead0 (final ChannelHandlerContext ctx, final HttpRequest req) { - uri = req.uri(); + uri = req.getUri(); final Channel client = ctx.channel(); Bootstrap proxiedServer = new Bootstrap() .group(client.eventLoop()) @@ -118,14 +118,14 @@ public void operationComplete(ChannelFuture future) throws Exception { if (future.isSuccess()) { ctx.channel().pipeline().remove(HttpResponseEncoder.class); HttpRequest newReq = new DefaultFullHttpRequest(HTTP_1_1, - req.method(), req.uri()); + req.getMethod(), req.getUri()); newReq.headers().add(req.headers()); - newReq.headers().set(CONNECTION, HttpHeaderValues.CLOSE); + newReq.headers().set(CONNECTION, Values.CLOSE); future.channel().writeAndFlush(newReq); } else { DefaultHttpResponse resp = new DefaultHttpResponse(HTTP_1_1, INTERNAL_SERVER_ERROR); - resp.headers().set(CONNECTION, HttpHeaderValues.CLOSE); + resp.headers().set(CONNECTION, Values.CLOSE); LOG.info("Proxy " + uri + " failed. Cause: ", future.cause()); ctx.writeAndFlush(resp).addListener(ChannelFutureListener.CLOSE); client.close(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/URLDispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/URLDispatcher.java index 7627d94..8ec5bf6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/URLDispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/URLDispatcher.java @@ -17,16 +17,16 @@ */ package org.apache.hadoop.hdfs.server.datanode.web; -import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.WEBHDFS_PREFIX; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelPipeline; import io.netty.channel.SimpleChannelInboundHandler; import io.netty.handler.codec.http.HttpRequest; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler; import java.net.InetSocketAddress; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler; +import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.WEBHDFS_PREFIX; class URLDispatcher extends SimpleChannelInboundHandler { private final InetSocketAddress proxyHost; @@ -42,8 +42,8 @@ @Override protected void channelRead0(ChannelHandlerContext ctx, HttpRequest req) - throws Exception { - String uri = req.uri(); + throws Exception { + String uri = req.getUri(); ChannelPipeline p = ctx.pipeline(); if (uri.startsWith(WEBHDFS_PREFIX)) { WebHdfsHandler h = new WebHdfsHandler(conf, confForCreate); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/DtpHttp2FrameListener.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/DtpHttp2FrameListener.java deleted file mode 100644 index 41e7cf4..0000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/DtpHttp2FrameListener.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.server.datanode.web.dtp; - -import io.netty.channel.ChannelHandlerContext; -import io.netty.handler.codec.http.HttpResponseStatus; -import io.netty.handler.codec.http2.DefaultHttp2Headers; -import io.netty.handler.codec.http2.Http2ConnectionEncoder; -import io.netty.handler.codec.http2.Http2Exception; -import io.netty.handler.codec.http2.Http2FrameAdapter; -import io.netty.handler.codec.http2.Http2Headers; - -import java.nio.charset.StandardCharsets; - -class DtpHttp2FrameListener extends Http2FrameAdapter { - - private Http2ConnectionEncoder encoder; - - public void encoder(Http2ConnectionEncoder encoder) { - this.encoder = encoder; - } - - @Override - public void onHeadersRead(ChannelHandlerContext ctx, int streamId, - Http2Headers headers, int streamDependency, short weight, - boolean exclusive, int padding, boolean endStream) throws Http2Exception { - encoder.writeHeaders(ctx, streamId, - new DefaultHttp2Headers().status(HttpResponseStatus.OK.codeAsText()), 0, - false, ctx.newPromise()); - encoder.writeData( - ctx, - streamId, - ctx.alloc().buffer() - .writeBytes("HTTP/2 DTP".getBytes(StandardCharsets.UTF_8)), 0, true, - ctx.newPromise()); - } -} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java index a6a8aa1..dce1f02 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/ExceptionHandler.java @@ -17,21 +17,12 @@ */ package org.apache.hadoop.hdfs.server.datanode.web.webhdfs; -import static io.netty.handler.codec.http.HttpHeaderNames.CONTENT_LENGTH; -import static io.netty.handler.codec.http.HttpHeaderNames.CONTENT_TYPE; -import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; -import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; -import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; -import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; -import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; -import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.APPLICATION_JSON_UTF8; +import com.google.common.base.Charsets; +import com.sun.jersey.api.ParamException; +import com.sun.jersey.api.container.ContainerException; import io.netty.buffer.Unpooled; import io.netty.handler.codec.http.DefaultFullHttpResponse; import io.netty.handler.codec.http.HttpResponseStatus; - -import java.io.FileNotFoundException; -import java.io.IOException; - import org.apache.commons.logging.Log; import org.apache.hadoop.hdfs.web.JsonUtil; import org.apache.hadoop.ipc.RemoteException; @@ -39,9 +30,17 @@ import org.apache.hadoop.security.authorize.AuthorizationException; import org.apache.hadoop.security.token.SecretManager; -import com.google.common.base.Charsets; -import com.sun.jersey.api.ParamException; -import com.sun.jersey.api.container.ContainerException; +import java.io.FileNotFoundException; +import java.io.IOException; + +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_LENGTH; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; +import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; +import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; +import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; +import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; +import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.APPLICATION_JSON_UTF8; class ExceptionHandler { static Log LOG = WebHdfsHandler.LOG; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java index 8de4bb2..b5654ab 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/HdfsWriter.java @@ -17,21 +17,21 @@ */ package org.apache.hadoop.hdfs.server.datanode.web.webhdfs; -import static io.netty.handler.codec.http.HttpHeaderNames.CONNECTION; -import static io.netty.handler.codec.http.HttpHeaderValues.CLOSE; import io.netty.channel.ChannelFutureListener; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.SimpleChannelInboundHandler; import io.netty.handler.codec.http.DefaultHttpResponse; import io.netty.handler.codec.http.HttpContent; import io.netty.handler.codec.http.LastHttpContent; +import org.apache.commons.logging.Log; +import org.apache.hadoop.hdfs.DFSClient; +import org.apache.hadoop.io.IOUtils; import java.io.IOException; import java.io.OutputStream; -import org.apache.commons.logging.Log; -import org.apache.hadoop.hdfs.DFSClient; -import org.apache.hadoop.io.IOUtils; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONNECTION; +import static io.netty.handler.codec.http.HttpHeaders.Values.CLOSE; class HdfsWriter extends SimpleChannelInboundHandler { private final DFSClient client; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java index b0421f2..6125b0c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/webhdfs/WebHdfsHandler.java @@ -17,23 +17,7 @@ */ package org.apache.hadoop.hdfs.server.datanode.web.webhdfs; -import static io.netty.handler.codec.http.HttpHeaderNames.ACCESS_CONTROL_ALLOW_METHODS; -import static io.netty.handler.codec.http.HttpHeaderNames.ACCESS_CONTROL_ALLOW_ORIGIN; -import static io.netty.handler.codec.http.HttpHeaderNames.CONNECTION; -import static io.netty.handler.codec.http.HttpHeaderNames.CONTENT_LENGTH; -import static io.netty.handler.codec.http.HttpHeaderNames.CONTENT_TYPE; -import static io.netty.handler.codec.http.HttpHeaderNames.LOCATION; -import static io.netty.handler.codec.http.HttpHeaderValues.CLOSE; -import static io.netty.handler.codec.http.HttpMethod.GET; -import static io.netty.handler.codec.http.HttpMethod.POST; -import static io.netty.handler.codec.http.HttpMethod.PUT; -import static io.netty.handler.codec.http.HttpResponseStatus.CONTINUE; -import static io.netty.handler.codec.http.HttpResponseStatus.CREATED; -import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; -import static io.netty.handler.codec.http.HttpResponseStatus.OK; -import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; -import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HDFS_URI_SCHEME; -import static org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier.HDFS_DELEGATION_KIND; +import com.google.common.base.Preconditions; import io.netty.buffer.Unpooled; import io.netty.channel.ChannelFutureListener; import io.netty.channel.ChannelHandlerContext; @@ -45,17 +29,6 @@ import io.netty.handler.codec.http.HttpRequest; import io.netty.handler.codec.http.QueryStringDecoder; import io.netty.handler.stream.ChunkedStream; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.InetSocketAddress; -import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.security.PrivilegedExceptionAction; -import java.util.EnumSet; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; @@ -76,7 +49,38 @@ import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.LimitInputStream; -import com.google.common.base.Preconditions; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.charset.StandardCharsets; +import java.security.PrivilegedExceptionAction; +import java.util.EnumSet; + +import static io.netty.handler.codec.http.HttpHeaders.Names.ACCEPT; +import static io.netty.handler.codec.http.HttpHeaders.Names.ACCESS_CONTROL_ALLOW_HEADERS; +import static io.netty.handler.codec.http.HttpHeaders.Names.ACCESS_CONTROL_ALLOW_METHODS; +import static io.netty.handler.codec.http.HttpHeaders.Names.ACCESS_CONTROL_ALLOW_ORIGIN; +import static io.netty.handler.codec.http.HttpHeaders.Names.ACCESS_CONTROL_MAX_AGE; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONNECTION; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_LENGTH; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; +import static io.netty.handler.codec.http.HttpHeaders.Names.LOCATION; +import static io.netty.handler.codec.http.HttpHeaders.Values.CLOSE; +import static io.netty.handler.codec.http.HttpHeaders.Values.KEEP_ALIVE; +import static io.netty.handler.codec.http.HttpMethod.GET; +import static io.netty.handler.codec.http.HttpMethod.OPTIONS; +import static io.netty.handler.codec.http.HttpMethod.POST; +import static io.netty.handler.codec.http.HttpMethod.PUT; +import static io.netty.handler.codec.http.HttpResponseStatus.CONTINUE; +import static io.netty.handler.codec.http.HttpResponseStatus.CREATED; +import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; +import static io.netty.handler.codec.http.HttpResponseStatus.OK; +import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import static org.apache.hadoop.hdfs.protocol.HdfsConstants.HDFS_URI_SCHEME; +import static org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier.HDFS_DELEGATION_KIND; public class WebHdfsHandler extends SimpleChannelInboundHandler { static final Log LOG = LogFactory.getLog(WebHdfsHandler.class); @@ -108,8 +112,8 @@ public WebHdfsHandler(Configuration conf, Configuration confForCreate) @Override public void channelRead0(final ChannelHandlerContext ctx, final HttpRequest req) throws Exception { - Preconditions.checkArgument(req.uri().startsWith(WEBHDFS_PREFIX)); - QueryStringDecoder queryString = new QueryStringDecoder(req.uri()); + Preconditions.checkArgument(req.getUri().startsWith(WEBHDFS_PREFIX)); + QueryStringDecoder queryString = new QueryStringDecoder(req.getUri()); params = new ParameterParser(queryString, conf); DataNodeUGIProvider ugiProvider = new DataNodeUGIProvider(params); ugi = ugiProvider.ugi(); @@ -130,7 +134,7 @@ public Void run() throws Exception { LOG.warn("Error retrieving hostname: ", e); host = "unknown"; } - REQLOG.info(host + " " + req.method() + " " + req.uri() + " " + + REQLOG.info(host + " " + req.getMethod() + " " + req.getUri() + " " + getResponseCode()); } return null; @@ -140,13 +144,13 @@ public Void run() throws Exception { int getResponseCode() { return (resp == null) ? INTERNAL_SERVER_ERROR.code() : - resp.status().code(); + resp.getStatus().code(); } public void handle(ChannelHandlerContext ctx, HttpRequest req) throws IOException, URISyntaxException { String op = params.op(); - HttpMethod method = req.method(); + HttpMethod method = req.getMethod(); if (PutOpParam.Op.CREATE.name().equalsIgnoreCase(op) && method == PUT) { onCreate(ctx); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java index d1b61d1..2c50460 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageHandler.java @@ -17,19 +17,7 @@ */ package org.apache.hadoop.hdfs.tools.offlineImageViewer; -import static io.netty.handler.codec.http.HttpHeaderNames.CONNECTION; -import static io.netty.handler.codec.http.HttpHeaderNames.CONTENT_LENGTH; -import static io.netty.handler.codec.http.HttpHeaderNames.CONTENT_TYPE; -import static io.netty.handler.codec.http.HttpHeaderValues.CLOSE; -import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; -import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; -import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; -import static io.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED; -import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; -import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; -import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.APPLICATION_JSON_UTF8; -import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.WEBHDFS_PREFIX; -import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.WEBHDFS_PREFIX_LENGTH; +import com.google.common.base.Charsets; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.ChannelFutureListener; @@ -42,18 +30,29 @@ import io.netty.handler.codec.http.HttpRequest; import io.netty.handler.codec.http.HttpResponseStatus; import io.netty.handler.codec.http.QueryStringDecoder; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hdfs.web.JsonUtil; +import org.apache.hadoop.util.StringUtils; import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; import java.util.Map; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hdfs.web.JsonUtil; -import org.apache.hadoop.util.StringUtils; - -import com.google.common.base.Charsets; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONNECTION; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_LENGTH; +import static io.netty.handler.codec.http.HttpHeaders.Names.CONTENT_TYPE; +import static io.netty.handler.codec.http.HttpHeaders.Values.CLOSE; +import static io.netty.handler.codec.http.HttpResponseStatus.BAD_REQUEST; +import static io.netty.handler.codec.http.HttpResponseStatus.FORBIDDEN; +import static io.netty.handler.codec.http.HttpResponseStatus.INTERNAL_SERVER_ERROR; +import static io.netty.handler.codec.http.HttpResponseStatus.METHOD_NOT_ALLOWED; +import static io.netty.handler.codec.http.HttpResponseStatus.NOT_FOUND; +import static io.netty.handler.codec.http.HttpVersion.HTTP_1_1; +import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.APPLICATION_JSON_UTF8; +import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.WEBHDFS_PREFIX; +import static org.apache.hadoop.hdfs.server.datanode.web.webhdfs.WebHdfsHandler.WEBHDFS_PREFIX_LENGTH; /** * Implement the read-only WebHDFS API for fsimage. @@ -76,7 +75,7 @@ public void channelActive(ChannelHandlerContext ctx) throws Exception { @Override public void channelRead0(ChannelHandlerContext ctx, HttpRequest request) throws Exception { - if (request.method() != HttpMethod.GET) { + if (request.getMethod() != HttpMethod.GET) { DefaultHttpResponse resp = new DefaultHttpResponse(HTTP_1_1, METHOD_NOT_ALLOWED); resp.headers().set(CONNECTION, CLOSE); @@ -84,7 +83,7 @@ public void channelRead0(ChannelHandlerContext ctx, HttpRequest request) return; } - QueryStringDecoder decoder = new QueryStringDecoder(request.uri()); + QueryStringDecoder decoder = new QueryStringDecoder(request.getUri()); final String op = getOp(decoder); final String content; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java index cc0fb92..b1858e2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestPread.java @@ -21,6 +21,7 @@ import static org.junit.Assert.assertTrue; import java.io.DataOutputStream; +import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; import java.util.Random; @@ -29,7 +30,7 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - +import java.util.concurrent.TimeUnit; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSDataInputStream; @@ -43,6 +44,7 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.mockito.Mockito; @@ -494,6 +496,54 @@ public void testPreadLocalFS() throws IOException { } } + @Test + public void testTruncateWhileReading() throws Exception { + Path path = new Path("/testfile"); + final int blockSize = 512; + + // prevent initial pre-fetch of multiple block locations + Configuration conf = new Configuration(); + conf.setLong(HdfsClientConfigKeys.Read.PREFETCH_SIZE_KEY, blockSize); + + MiniDFSCluster cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + try { + DistributedFileSystem fs = cluster.getFileSystem(); + // create multi-block file + FSDataOutputStream dos = + fs.create(path, true, blockSize, (short)1, blockSize); + dos.write(new byte[blockSize*3]); + dos.close(); + // truncate a file while it's open + final FSDataInputStream dis = fs.open(path); + while (!fs.truncate(path, 10)) { + Thread.sleep(10); + } + // verify that reading bytes outside the initial pre-fetch do + // not send the client into an infinite loop querying locations. + ExecutorService executor = Executors.newFixedThreadPool(1); + Future future = executor.submit(new Callable() { + @Override + public Void call() throws IOException { + // read from 2nd block. + dis.readFully(blockSize, new byte[4]); + return null; + } + }); + try { + future.get(4, TimeUnit.SECONDS); + Assert.fail(); + } catch (ExecutionException ee) { + assertTrue(ee.toString(), ee.getCause() instanceof EOFException); + } finally { + future.cancel(true); + executor.shutdown(); + } + } finally { + cluster.shutdown(); + } + } + public static void main(String[] args) throws Exception { new TestPread().testPreadDFS(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/Http2ResponseHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/Http2ResponseHandler.java deleted file mode 100644 index eb8b918..0000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/Http2ResponseHandler.java +++ /dev/null @@ -1,65 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.server.datanode.web.dtp; - -import io.netty.channel.ChannelHandlerContext; -import io.netty.channel.SimpleChannelInboundHandler; -import io.netty.handler.codec.http.FullHttpResponse; -import io.netty.handler.codec.http2.HttpUtil; -import io.netty.util.concurrent.Promise; - -import java.util.HashMap; -import java.util.Map; - -public class Http2ResponseHandler extends - SimpleChannelInboundHandler { - - private Map> streamId2Promise = - new HashMap<>(); - - @Override - protected void channelRead0(ChannelHandlerContext ctx, FullHttpResponse msg) - throws Exception { - Integer streamId = - msg.headers().getInt(HttpUtil.ExtensionHeaderNames.STREAM_ID.text()); - if (streamId == null) { - System.err.println("HttpResponseHandler unexpected message received: " - + msg); - return; - } - if (streamId.intValue() == 1) { - // this is the upgrade response message, just ignore it. - return; - } - Promise promise; - synchronized (this) { - promise = streamId2Promise.get(streamId); - } - if (promise == null) { - System.err.println("Message received for unknown stream id " + streamId); - } else { - // Do stuff with the message (for now just print it) - promise.setSuccess(msg.retain()); - - } - } - - public void put(Integer streamId, Promise promise) { - streamId2Promise.put(streamId, promise); - } -} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/TestDtpHttp2.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/TestDtpHttp2.java deleted file mode 100644 index 4e91004..0000000 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/TestDtpHttp2.java +++ /dev/null @@ -1,147 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hdfs.server.datanode.web.dtp; - -import static org.junit.Assert.assertEquals; -import io.netty.bootstrap.Bootstrap; -import io.netty.buffer.ByteBuf; -import io.netty.channel.Channel; -import io.netty.channel.ChannelInitializer; -import io.netty.channel.EventLoopGroup; -import io.netty.channel.nio.NioEventLoopGroup; -import io.netty.channel.socket.nio.NioSocketChannel; -import io.netty.handler.codec.http.DefaultFullHttpRequest; -import io.netty.handler.codec.http.FullHttpRequest; -import io.netty.handler.codec.http.FullHttpResponse; -import io.netty.handler.codec.http.HttpMethod; -import io.netty.handler.codec.http.HttpResponseStatus; -import io.netty.handler.codec.http.HttpVersion; -import io.netty.handler.codec.http2.DefaultHttp2Connection; -import io.netty.handler.codec.http2.DefaultHttp2FrameReader; -import io.netty.handler.codec.http2.DefaultHttp2FrameWriter; -import io.netty.handler.codec.http2.DelegatingDecompressorFrameListener; -import io.netty.handler.codec.http2.Http2Connection; -import io.netty.handler.codec.http2.Http2ConnectionHandler; -import io.netty.handler.codec.http2.Http2FrameLogger; -import io.netty.handler.codec.http2.Http2FrameReader; -import io.netty.handler.codec.http2.Http2FrameWriter; -import io.netty.handler.codec.http2.Http2InboundFrameLogger; -import io.netty.handler.codec.http2.Http2OutboundFrameLogger; -import io.netty.handler.codec.http2.HttpToHttp2ConnectionHandler; -import io.netty.handler.codec.http2.HttpUtil; -import io.netty.handler.codec.http2.InboundHttp2ToHttpAdapter; -import io.netty.handler.logging.LogLevel; -import io.netty.handler.timeout.TimeoutException; -import io.netty.util.concurrent.Promise; - -import java.io.IOException; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.util.concurrent.ExecutionException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hdfs.MiniDFSCluster; -import org.apache.hadoop.hdfs.web.WebHdfsTestUtil; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestDtpHttp2 { - - private static final Http2FrameLogger FRAME_LOGGER = new Http2FrameLogger( - LogLevel.INFO, TestDtpHttp2.class); - - private static final Configuration CONF = WebHdfsTestUtil.createConf(); - - private static MiniDFSCluster CLUSTER; - - private static final EventLoopGroup WORKER_GROUP = new NioEventLoopGroup(); - - private static Channel CHANNEL; - - private static Http2ResponseHandler RESPONSE_HANDLER; - - @BeforeClass - public static void setUp() throws IOException, URISyntaxException, - TimeoutException { - CLUSTER = new MiniDFSCluster.Builder(CONF).numDataNodes(1).build(); - CLUSTER.waitActive(); - - RESPONSE_HANDLER = new Http2ResponseHandler(); - Bootstrap bootstrap = - new Bootstrap() - .group(WORKER_GROUP) - .channel(NioSocketChannel.class) - .remoteAddress("127.0.0.1", - CLUSTER.getDataNodes().get(0).getInfoPort()) - .handler(new ChannelInitializer() { - - @Override - protected void initChannel(Channel ch) throws Exception { - Http2Connection connection = new DefaultHttp2Connection(false); - Http2ConnectionHandler connectionHandler = - new HttpToHttp2ConnectionHandler(connection, frameReader(), - frameWriter(), new DelegatingDecompressorFrameListener( - connection, new InboundHttp2ToHttpAdapter.Builder( - connection).maxContentLength(Integer.MAX_VALUE) - .propagateSettings(true).build())); - ch.pipeline().addLast(connectionHandler, RESPONSE_HANDLER); - } - }); - CHANNEL = bootstrap.connect().syncUninterruptibly().channel(); - - } - - @AfterClass - public static void tearDown() throws IOException { - if (CHANNEL != null) { - CHANNEL.close().syncUninterruptibly(); - } - WORKER_GROUP.shutdownGracefully(); - if (CLUSTER != null) { - CLUSTER.shutdown(); - } - } - - private static Http2FrameReader frameReader() { - return new Http2InboundFrameLogger(new DefaultHttp2FrameReader(), - FRAME_LOGGER); - } - - private static Http2FrameWriter frameWriter() { - return new Http2OutboundFrameLogger(new DefaultHttp2FrameWriter(), - FRAME_LOGGER); - } - - @Test - public void test() throws InterruptedException, ExecutionException { - int streamId = 3; - FullHttpRequest request = - new DefaultFullHttpRequest(HttpVersion.HTTP_1_1, HttpMethod.GET, "/"); - request.headers().add(HttpUtil.ExtensionHeaderNames.STREAM_ID.text(), - streamId); - Promise promise = CHANNEL.eventLoop().newPromise(); - synchronized (RESPONSE_HANDLER) { - CHANNEL.writeAndFlush(request); - RESPONSE_HANDLER.put(streamId, promise); - } - assertEquals(HttpResponseStatus.OK, promise.get().status()); - ByteBuf content = promise.get().content(); - assertEquals("HTTP/2 DTP", content.toString(StandardCharsets.UTF_8)); - } -} diff --git a/hadoop-hdfs-project/pom.xml b/hadoop-hdfs-project/pom.xml index 566d509..b96a0c8 100644 --- a/hadoop-hdfs-project/pom.xml +++ b/hadoop-hdfs-project/pom.xml @@ -20,12 +20,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-hdfs-project - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop HDFS Project Apache Hadoop HDFS Project pom diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml index 0a5343a..15be52d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-app - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce App diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml index 276332f..20f1ab1 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-common - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce Common diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml index 58845cd..ea28a4a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-core - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce Core diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java index c15e647..9672f31 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/filecache/ClientDistributedCacheManager.java @@ -275,10 +275,21 @@ private static boolean checkPermissionOfOther(FileSystem fs, Path path, FsAction action, Map statCache) throws IOException { FileStatus status = getFileStatus(fs, path.toUri(), statCache); FsPermission perms = status.getPermission(); - FsAction otherAction = perms.getOtherAction(); - if (otherAction.implies(action)) { - return true; + + // Encrypted files are always treated as private. This stance has two + // important side effects. The first is that the encrypted files will be + // downloaded as the job owner instead of the YARN user, which is required + // for the KMS ACLs to work as expected. Second, it prevent a file with + // world readable permissions that is stored in an encryption zone from + // being localized as a publicly shared file with world readable + // permissions. + if (!perms.getEncryptedBit()) { + FsAction otherAction = perms.getOtherAction(); + if (otherAction.implies(action)) { + return true; + } } + return false; } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml index ebafbcd..15a0320 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-hs-plugins - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce HistoryServer Plugins diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml index c164c78..69c4862 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-hs - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce HistoryServer diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml index f698113..245703d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-jobclient - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce JobClient diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java index 68a8861..3927755 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/main/java/org/apache/hadoop/mapred/ResourceMgrDelegate.java @@ -466,19 +466,19 @@ public ReservationListResponse listReservations( return client.listReservations(request); } @Override - public Map> getNodeToLabels() throws YarnException, + public Map> getNodeToLabels() throws YarnException, IOException { return client.getNodeToLabels(); } @Override - public Map> getLabelsToNodes() throws YarnException, + public Map> getLabelsToNodes() throws YarnException, IOException { return client.getLabelsToNodes(); } @Override - public Map> getLabelsToNodes(Set labels) + public Map> getLabelsToNodes(Set labels) throws YarnException, IOException { return client.getLabelsToNodes(labels); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml index c6001b9..08a2156 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml @@ -19,12 +19,12 @@ hadoop-mapreduce-client org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-mapreduce-client-shuffle - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce Shuffle diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml index a6738f9..acbe3a2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-mapreduce-client - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce Client pom diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml index 3d36378..8d8e745 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-mapreduce-examples - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce Examples Apache Hadoop MapReduce Examples jar diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml index 80bc46e..26b9631 100644 --- a/hadoop-mapreduce-project/pom.xml +++ b/hadoop-mapreduce-project/pom.xml @@ -18,12 +18,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-mapreduce - 2.8.0-SNAPSHOT + 2.8.0 pom Apache Hadoop MapReduce http://hadoop.apache.org/mapreduce/ diff --git a/hadoop-maven-plugins/pom.xml b/hadoop-maven-plugins/pom.xml index 2bc3e5b..87774de 100644 --- a/hadoop-maven-plugins/pom.xml +++ b/hadoop-maven-plugins/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop diff --git a/hadoop-minicluster/pom.xml b/hadoop-minicluster/pom.xml index 553169f..6fb45da 100644 --- a/hadoop-minicluster/pom.xml +++ b/hadoop-minicluster/pom.xml @@ -18,12 +18,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-minicluster - 2.8.0-SNAPSHOT + 2.8.0 jar Apache Hadoop Mini-Cluster diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index edc6950..356d6f0 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Project Dist POM Apache Hadoop Project Dist POM pom diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index d5c249c..979d323 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-main - 2.8.0-SNAPSHOT + 2.8.0 org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Project POM Apache Hadoop Project POM pom @@ -612,13 +612,7 @@ io.netty netty-all - 4.1.0.Beta5 - - - - com.twitter - hpack - 0.11.0 + 4.0.23.Final diff --git a/hadoop-project/src/site/markdown/index.md.vm b/hadoop-project/src/site/markdown/index.md.vm index 1cbc181..5c640b8 100644 --- a/hadoop-project/src/site/markdown/index.md.vm +++ b/hadoop-project/src/site/markdown/index.md.vm @@ -16,47 +16,75 @@ Apache Hadoop ${project.version} ================================ Apache Hadoop ${project.version} is a minor release in the 2.x.y release -line, building upon the previous stable release 2.4.1. +line, building upon the previous stable release 2.7.3. Here is a short overview of the major features and improvements. * Common - * Authentication improvements when using an HTTP proxy server. This is - useful when accessing WebHDFS via a proxy server. + * Support async call retry and failover which can be used in async DFS + implementation with retry effort. - * A new Hadoop metrics sink that allows writing directly to Graphite. + * Cross Frame Scripting (XFS) prevention for UIs can be provided through + a common servlet filter. - * [Specification work](./hadoop-project-dist/hadoop-common/filesystem/index.html) - related to the Hadoop Compatible Filesystem (HCFS) effort. + * S3A improvements: add ability to plug in any AWSCredentialsProvider, + support read s3a credentials from hadoop credential provider API in + addition to XML configuraiton files, support Amazon STS temporary + credentials + + * WASB improvements: adding append API support + + * Build enhancements: replace dev-support with wrappers to Yetus, + provide a docker based solution to setup a build environment, + remove CHANGES.txt and rework the change log and release notes. + + * Add posixGroups support for LDAP groups mapping service. + + * Support integration with Azure Data Lake (ADL) as an alternative + Hadoop-compatible file system. * HDFS - * Support for POSIX-style filesystem extended attributes. See the - [user documentation](./hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html) - for more details. + * WebHDFS enhancements: integrate CSRF prevention filter in WebHDFS, + support OAuth2 in WebHDFS, disallow/allow snapshots via WebHDFS + + * Allow long-running Balancer to login with keytab + + * Add ReverseXML processor which reconstructs an fsimage from an XML file. + This will make it easy to create fsimages for testing, and manually edit + fsimages when there is corruption - * Using the OfflineImageViewer, clients can now browse an fsimage via - the WebHDFS API. + * Support nested encryption zones - * The NFS gateway received a number of supportability improvements and - bug fixes. The Hadoop portmapper is no longer required to run the gateway, - and the gateway is now able to reject connections from unprivileged ports. + * DataNode Lifeline Protocol: an alternative protocol for reporting DataNode + liveness. This can prevent the NameNode from incorrectly marking DataNodes + as stale or dead in highly overloaded clusters where heartbeat processing + is suffering delays. - * The SecondaryNameNode, JournalNode, and DataNode web UIs have been - modernized with HTML5 and Javascript. + * Logging HDFS operation's caller context into audit logs + + * A new Datanode command for evicting writers which is useful when data node + decommissioning is blocked by slow writers. * YARN - * YARN's REST APIs now support write/modify operations. Users can submit and - kill applications through REST APIs. + * NodeManager CPU resource monitoring in Windows. + + * NM shutdown more graceful: NM will unregister to RM immediately rather than + waiting for timeout to be LOST (if NM work preserving is not enabled). + + * Add ability to fail a specific AM attempt in scenario of AM attempt get stuck. + + * CallerContext support in YARN audit log. + + * ATS versioning support: a new configuration to indicate timeline service version. + +* MAPREDUCE - * The timeline store in YARN, used for storing generic and - application-specific information for applications, supports authentication - through Kerberos. + * Allow node labels get specificed in submitting MR jobs - * The Fair Scheduler supports dynamic hierarchical user queues, user queues - are created dynamically at runtime under any specified parent-queue. + * Add a new tool to combine aggregated logs into HAR files Getting Started =============== diff --git a/hadoop-tools/hadoop-ant/pom.xml b/hadoop-tools/hadoop-ant/pom.xml index 03a829c..afb3253 100644 --- a/hadoop-tools/hadoop-ant/pom.xml +++ b/hadoop-tools/hadoop-ant/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-ant - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Ant Tasks Apache Hadoop Ant Tasks jar diff --git a/hadoop-tools/hadoop-archive-logs/pom.xml b/hadoop-tools/hadoop-archive-logs/pom.xml index 6ded278..799429a 100644 --- a/hadoop-tools/hadoop-archive-logs/pom.xml +++ b/hadoop-tools/hadoop-archive-logs/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-archive-logs - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Archive Logs Apache Hadoop Archive Logs jar diff --git a/hadoop-tools/hadoop-archives/pom.xml b/hadoop-tools/hadoop-archives/pom.xml index 131d73c..b8e4e9d 100644 --- a/hadoop-tools/hadoop-archives/pom.xml +++ b/hadoop-tools/hadoop-archives/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-archives - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Archives Apache Hadoop Archives jar diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 1a42b64..dff70f8 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project hadoop-aws - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Amazon Web Services support This module contains code to support integration with Amazon Web Services. @@ -183,7 +183,7 @@ **/ITestJets3tNativeS3FileSystemContract.java **/ITest*Root*.java **/ITestS3AFileContextStatistics.java - **/ITestS3AHuge*.java + **/ITestS3AHuge*.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BlockingThreadPoolExecutorService.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BlockingThreadPoolExecutorService.java index eb40c3a..f13942d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BlockingThreadPoolExecutorService.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/BlockingThreadPoolExecutorService.java @@ -86,7 +86,7 @@ public Thread newThread(Runnable r) { * @return a thread factory that creates named, daemon threads with * the supplied exception handler and normal priority */ - private static ThreadFactory newDaemonThreadFactory(final String prefix) { + static ThreadFactory newDaemonThreadFactory(final String prefix) { final ThreadFactory namedFactory = getNamedThreadFactory(prefix); return new ThreadFactory() { @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index d2f0b90..6496268 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -48,6 +48,14 @@ private Constants() { public static final String AWS_CREDENTIALS_PROVIDER = "fs.s3a.aws.credentials.provider"; + /** + * Extra set of security credentials which will be prepended to that + * set in {@code "hadoop.security.credential.provider.path"}. + * This extra option allows for per-bucket overrides. + */ + public static final String S3A_SECURITY_CREDENTIAL_PROVIDER_PATH = + "fs.s3a.security.credential.provider.path"; + // session token for when using TemporaryAWSCredentialsProvider public static final String SESSION_TOKEN = "fs.s3a.session.token"; @@ -231,6 +239,12 @@ private Constants() { public static final String FS_S3A_BLOCK_SIZE = "fs.s3a.block.size"; public static final String FS_S3A = "s3a"; + /** Prefix for all S3A properties: {@value}. */ + public static final String FS_S3A_PREFIX = "fs.s3a."; + + /** Prefix for S3A bucket-specific properties: {@value}. */ + public static final String FS_S3A_BUCKET_PREFIX = "fs.s3a.bucket."; + public static final int S3A_DEFAULT_PORT = -1; public static final String USER_AGENT_PREFIX = "fs.s3a.user.agent.prefix"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index b66a23f..e12600c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.s3a; import java.io.IOException; -import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; @@ -48,7 +47,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.Progressable; @@ -178,10 +176,10 @@ if (activeBlock == null) { blockCount++; if (blockCount>= Constants.MAX_MULTIPART_COUNT) { - LOG.error("Number of partitions in stream exceeds limit for S3: " + + LOG.error("Number of partitions in stream exceeds limit for S3: " + Constants.MAX_MULTIPART_COUNT + " write may fail."); } - activeBlock = blockFactory.create(this.blockSize); + activeBlock = blockFactory.create(blockCount, this.blockSize, statistics); } return activeBlock; } @@ -206,7 +204,9 @@ private synchronized boolean hasActiveBlock() { * Clear the active block. */ private void clearActiveBlock() { - LOG.debug("Clearing active block"); + if (activeBlock != null) { + LOG.debug("Clearing active block"); + } synchronized (this) { activeBlock = null; } @@ -356,11 +356,9 @@ public void close() throws IOException { writeOperationHelper.writeFailed(ioe); throw ioe; } finally { - LOG.debug("Closing block and factory"); - IOUtils.closeStream(block); - IOUtils.closeStream(blockFactory); + closeAll(LOG, block, blockFactory); LOG.debug("Statistics: {}", statistics); - IOUtils.closeStream(statistics); + closeAll(LOG, statistics); clearActiveBlock(); } // All end of write operations, including deleting fake parent directories @@ -378,10 +376,10 @@ private void putObject() throws IOException { final S3ADataBlocks.DataBlock block = getActiveBlock(); int size = block.dataSize(); - final PutObjectRequest putObjectRequest = - writeOperationHelper.newPutRequest( - block.startUpload(), - size); + final S3ADataBlocks.BlockUploadData uploadData = block.startUpload(); + final PutObjectRequest putObjectRequest = uploadData.hasFile() ? + writeOperationHelper.newPutRequest(uploadData.getFile()) + : writeOperationHelper.newPutRequest(uploadData.getUploadStream(), size); long transferQueueTime = now(); BlockUploadProgress callback = new BlockUploadProgress( @@ -392,8 +390,14 @@ private void putObject() throws IOException { executorService.submit(new Callable() { @Override public PutObjectResult call() throws Exception { - PutObjectResult result = fs.putObjectDirect(putObjectRequest); - block.close(); + PutObjectResult result; + try { + // the putObject call automatically closes the input + // stream afterwards. + result = writeOperationHelper.putObject(putObjectRequest); + } finally { + closeAll(LOG, uploadData, block); + } return result; } }); @@ -437,13 +441,21 @@ private long now() { } /** + * Get the statistics for this stream. + * @return stream statistics + */ + S3AInstrumentation.OutputStreamStatistics getStatistics() { + return statistics; + } + + /** * Multiple partition upload. */ private class MultiPartUpload { private final String uploadId; private final List> partETagsFutures; - public MultiPartUpload() throws IOException { + MultiPartUpload() throws IOException { this.uploadId = writeOperationHelper.initiateMultiPartUpload(); this.partETagsFutures = new ArrayList<>(2); LOG.debug("Initiated multi-part upload for {} with " + @@ -460,14 +472,16 @@ private void uploadBlockAsync(final S3ADataBlocks.DataBlock block) throws IOException { LOG.debug("Queueing upload of {}", block); final int size = block.dataSize(); - final InputStream uploadStream = block.startUpload(); + final S3ADataBlocks.BlockUploadData uploadData = block.startUpload(); final int currentPartNumber = partETagsFutures.size() + 1; final UploadPartRequest request = writeOperationHelper.newUploadPartRequest( uploadId, - uploadStream, currentPartNumber, - size); + size, + uploadData.getUploadStream(), + uploadData.getFile()); + long transferQueueTime = now(); BlockUploadProgress callback = new BlockUploadProgress( @@ -482,12 +496,16 @@ public PartETag call() throws Exception { LOG.debug("Uploading part {} for id '{}'", currentPartNumber, uploadId); // do the upload - PartETag partETag = fs.uploadPart(request).getPartETag(); - LOG.debug("Completed upload of {}", block); - LOG.debug("Stream statistics of {}", statistics); - - // close the block - block.close(); + PartETag partETag; + try { + partETag = fs.uploadPart(request).getPartETag(); + LOG.debug("Completed upload of {} to part {}", block, + partETag.getETag()); + LOG.debug("Stream statistics of {}", statistics); + } finally { + // close the stream and block + closeAll(LOG, uploadData, block); + } return partETag; } }); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java index 0fe2af7..9bc8dcd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ADataBlocks.java @@ -24,10 +24,8 @@ import java.io.Closeable; import java.io.EOFException; import java.io.File; -import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; -import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.nio.ByteBuffer; @@ -42,10 +40,11 @@ import org.apache.hadoop.util.DirectBufferPool; import static org.apache.hadoop.fs.s3a.S3ADataBlocks.DataBlock.DestState.*; +import static org.apache.hadoop.fs.s3a.S3AUtils.closeAll; /** * Set of classes to support output streaming into blocks which are then - * uploaded as partitions. + * uploaded as to S3 as a single PUT, or as part of a multipart request. */ final class S3ADataBlocks { @@ -97,6 +96,70 @@ static BlockFactory createFactory(S3AFileSystem owner, } /** + * The output information for an upload. + * It can be one of a file or an input stream. + * When closed, any stream is closed. Any source file is untouched. + */ + static final class BlockUploadData implements Closeable { + private final File file; + private final InputStream uploadStream; + + /** + * File constructor; input stream will be null. + * @param file file to upload + */ + BlockUploadData(File file) { + Preconditions.checkArgument(file.exists(), "No file: " + file); + this.file = file; + this.uploadStream = null; + } + + /** + * Stream constructor, file field will be null. + * @param uploadStream stream to upload + */ + BlockUploadData(InputStream uploadStream) { + Preconditions.checkNotNull(uploadStream, "rawUploadStream"); + this.uploadStream = uploadStream; + this.file = null; + } + + /** + * Predicate: does this instance contain a file reference. + * @return true if there is a file. + */ + boolean hasFile() { + return file != null; + } + + /** + * Get the file, if there is one. + * @return the file for uploading, or null. + */ + File getFile() { + return file; + } + + /** + * Get the raw upload stream, if the object was + * created with one. + * @return the upload stream or null. + */ + InputStream getUploadStream() { + return uploadStream; + } + + /** + * Close: closes any upload stream provided in the constructor. + * @throws IOException inherited exception + */ + @Override + public void close() throws IOException { + closeAll(LOG, uploadStream); + } + } + + /** * Base class for block factories. */ static abstract class BlockFactory implements Closeable { @@ -110,15 +173,21 @@ protected BlockFactory(S3AFileSystem owner) { /** * Create a block. + * + * @param index index of block * @param limit limit of the block. + * @param statistics stats to work with * @return a new block. */ - abstract DataBlock create(int limit) throws IOException; + abstract DataBlock create(long index, int limit, + S3AInstrumentation.OutputStreamStatistics statistics) + throws IOException; /** * Implement any close/cleanup operation. * Base class is a no-op - * @throws IOException -ideally, it shouldn't. + * @throws IOException Inherited exception; implementations should + * avoid raising it. */ @Override public void close() throws IOException { @@ -140,6 +209,14 @@ protected S3AFileSystem getOwner() { enum DestState {Writing, Upload, Closed} private volatile DestState state = Writing; + protected final long index; + protected final S3AInstrumentation.OutputStreamStatistics statistics; + + protected DataBlock(long index, + S3AInstrumentation.OutputStreamStatistics statistics) { + this.index = index; + this.statistics = statistics; + } /** * Atomically enter a state, verifying current state. @@ -243,8 +320,8 @@ void flush() throws IOException { * @return the stream * @throws IOException trouble */ - InputStream startUpload() throws IOException { - LOG.debug("Start datablock upload"); + BlockUploadData startUpload() throws IOException { + LOG.debug("Start datablock[{}] upload", index); enterState(Writing, Upload); return null; } @@ -278,6 +355,23 @@ protected void innerClose() throws IOException { } + /** + * A block has been allocated. + */ + protected void blockAllocated() { + if (statistics != null) { + statistics.blockAllocated(); + } + } + + /** + * A block has been released. + */ + protected void blockReleased() { + if (statistics != null) { + statistics.blockReleased(); + } + } } // ==================================================================== @@ -292,12 +386,33 @@ protected void innerClose() throws IOException { } @Override - DataBlock create(int limit) throws IOException { - return new ByteArrayBlock(limit); + DataBlock create(long index, int limit, + S3AInstrumentation.OutputStreamStatistics statistics) + throws IOException { + return new ByteArrayBlock(0, limit, statistics); } } + static class S3AByteArrayOutputStream extends ByteArrayOutputStream { + + S3AByteArrayOutputStream(int size) { + super(size); + } + + /** + * InputStream backed by the internal byte array + * + * @return + */ + ByteArrayInputStream getInputStream() { + ByteArrayInputStream bin = new ByteArrayInputStream(this.buf, 0, count); + this.reset(); + this.buf = null; + return bin; + } + } + /** * Stream to memory via a {@code ByteArrayOutputStream}. * @@ -310,14 +425,18 @@ DataBlock create(int limit) throws IOException { */ static class ByteArrayBlock extends DataBlock { - private ByteArrayOutputStream buffer; + private S3AByteArrayOutputStream buffer; private final int limit; // cache data size so that it is consistent after the buffer is reset. private Integer dataSize; - ByteArrayBlock(int limit) { + ByteArrayBlock(long index, + int limit, + S3AInstrumentation.OutputStreamStatistics statistics) { + super(index, statistics); this.limit = limit; - buffer = new ByteArrayOutputStream(); + buffer = new S3AByteArrayOutputStream(limit); + blockAllocated(); } /** @@ -330,13 +449,12 @@ int dataSize() { } @Override - InputStream startUpload() throws IOException { + BlockUploadData startUpload() throws IOException { super.startUpload(); dataSize = buffer.size(); - ByteArrayInputStream bufferData = new ByteArrayInputStream( - buffer.toByteArray()); + ByteArrayInputStream bufferData = buffer.getInputStream(); buffer = null; - return bufferData; + return new BlockUploadData(bufferData); } @Override @@ -360,12 +478,14 @@ int write(byte[] b, int offset, int len) throws IOException { @Override protected void innerClose() { buffer = null; + blockReleased(); } @Override public String toString() { - return "ByteArrayBlock{" + - "state=" + getState() + + return "ByteArrayBlock{" + +"index=" + index + + ", state=" + getState() + ", limit=" + limit + ", dataSize=" + dataSize + '}'; @@ -377,12 +497,6 @@ public String toString() { /** * Stream via Direct ByteBuffers; these are allocated off heap * via {@link DirectBufferPool}. - * This is actually the most complex of all the block factories, - * due to the need to explicitly recycle buffers; in comparison, the - * {@link DiskBlock} buffer delegates the work of deleting files to - * the {@link DiskBlock.FileDeletingInputStream}. Here the - * input stream {@link ByteBufferInputStream} has a similar task, along - * with the foundational work of streaming data from a byte array. */ static class ByteBufferBlockFactory extends BlockFactory { @@ -395,8 +509,10 @@ public String toString() { } @Override - ByteBufferBlock create(int limit) throws IOException { - return new ByteBufferBlock(limit); + ByteBufferBlock create(long index, int limit, + S3AInstrumentation.OutputStreamStatistics statistics) + throws IOException { + return new ByteBufferBlock(index, limit, statistics); } private ByteBuffer requestBuffer(int limit) { @@ -428,21 +544,27 @@ public String toString() { /** * A DataBlock which requests a buffer from pool on creation; returns - * it when the output stream is closed. + * it when it is closed. */ class ByteBufferBlock extends DataBlock { - private ByteBuffer buffer; + private ByteBuffer blockBuffer; private final int bufferSize; // cache data size so that it is consistent after the buffer is reset. private Integer dataSize; /** * Instantiate. This will request a ByteBuffer of the desired size. + * @param index block index * @param bufferSize buffer size + * @param statistics statistics to update */ - ByteBufferBlock(int bufferSize) { + ByteBufferBlock(long index, + int bufferSize, + S3AInstrumentation.OutputStreamStatistics statistics) { + super(index, statistics); this.bufferSize = bufferSize; - buffer = requestBuffer(bufferSize); + blockBuffer = requestBuffer(bufferSize); + blockAllocated(); } /** @@ -455,13 +577,14 @@ int dataSize() { } @Override - ByteBufferInputStream startUpload() throws IOException { + BlockUploadData startUpload() throws IOException { super.startUpload(); dataSize = bufferCapacityUsed(); // set the buffer up from reading from the beginning - buffer.limit(buffer.position()); - buffer.position(0); - return new ByteBufferInputStream(dataSize, buffer); + blockBuffer.limit(blockBuffer.position()); + blockBuffer.position(0); + return new BlockUploadData( + new ByteBufferInputStream(dataSize, blockBuffer)); } @Override @@ -471,182 +594,190 @@ public boolean hasCapacity(long bytes) { @Override public int remainingCapacity() { - return buffer != null ? buffer.remaining() : 0; + return blockBuffer != null ? blockBuffer.remaining() : 0; } private int bufferCapacityUsed() { - return buffer.capacity() - buffer.remaining(); + return blockBuffer.capacity() - blockBuffer.remaining(); } @Override int write(byte[] b, int offset, int len) throws IOException { super.write(b, offset, len); int written = Math.min(remainingCapacity(), len); - buffer.put(b, offset, written); + blockBuffer.put(b, offset, written); return written; } + /** + * Closing the block will release the buffer. + */ @Override protected void innerClose() { - buffer = null; + if (blockBuffer != null) { + blockReleased(); + releaseBuffer(blockBuffer); + blockBuffer = null; + } } @Override public String toString() { return "ByteBufferBlock{" - + "state=" + getState() + + + "index=" + index + + ", state=" + getState() + ", dataSize=" + dataSize() + ", limit=" + bufferSize + ", remainingCapacity=" + remainingCapacity() + '}'; } - } - - /** - * Provide an input stream from a byte buffer; supporting - * {@link #mark(int)}, which is required to enable replay of failed - * PUT attempts. - * This input stream returns the buffer to the pool afterwards. - */ - class ByteBufferInputStream extends InputStream { + /** + * Provide an input stream from a byte buffer; supporting + * {@link #mark(int)}, which is required to enable replay of failed + * PUT attempts. + */ + class ByteBufferInputStream extends InputStream { - private final int size; - private ByteBuffer byteBuffer; + private final int size; + private ByteBuffer byteBuffer; - ByteBufferInputStream(int size, ByteBuffer byteBuffer) { - LOG.debug("Creating ByteBufferInputStream of size {}", size); - this.size = size; - this.byteBuffer = byteBuffer; - } + ByteBufferInputStream(int size, + ByteBuffer byteBuffer) { + LOG.debug("Creating ByteBufferInputStream of size {}", size); + this.size = size; + this.byteBuffer = byteBuffer; + } - /** - * Return the buffer to the pool after the stream is closed. - */ - @Override - public synchronized void close() { - if (byteBuffer != null) { - LOG.debug("releasing buffer"); - releaseBuffer(byteBuffer); + /** + * After the stream is closed, set the local reference to the byte + * buffer to null; this guarantees that future attempts to use + * stream methods will fail. + */ + @Override + public synchronized void close() { + LOG.debug("ByteBufferInputStream.close() for {}", + ByteBufferBlock.super.toString()); byteBuffer = null; } - } - /** - * Verify that the stream is open. - * @throws IOException if the stream is closed - */ - private void verifyOpen() throws IOException { - if (byteBuffer == null) { - throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + /** + * Verify that the stream is open. + * @throws IOException if the stream is closed + */ + private void verifyOpen() throws IOException { + if (byteBuffer == null) { + throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); + } } - } - public synchronized int read() throws IOException { - if (available() > 0) { - return byteBuffer.get() & 0xFF; - } else { - return -1; + public synchronized int read() throws IOException { + if (available() > 0) { + return byteBuffer.get() & 0xFF; + } else { + return -1; + } } - } - @Override - public synchronized long skip(long offset) throws IOException { - verifyOpen(); - long newPos = position() + offset; - if (newPos < 0) { - throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + @Override + public synchronized long skip(long offset) throws IOException { + verifyOpen(); + long newPos = position() + offset; + if (newPos < 0) { + throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); + } + if (newPos > size) { + throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + } + byteBuffer.position((int) newPos); + return newPos; } - if (newPos > size) { - throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); + + @Override + public synchronized int available() { + Preconditions.checkState(byteBuffer != null, + FSExceptionMessages.STREAM_IS_CLOSED); + return byteBuffer.remaining(); } - byteBuffer.position((int) newPos); - return newPos; - } - @Override - public synchronized int available() { - Preconditions.checkState(byteBuffer != null, - FSExceptionMessages.STREAM_IS_CLOSED); - return byteBuffer.remaining(); - } + /** + * Get the current buffer position. + * @return the buffer position + */ + public synchronized int position() { + return byteBuffer.position(); + } - /** - * Get the current buffer position. - * @return the buffer position - */ - public synchronized int position() { - return byteBuffer.position(); - } + /** + * Check if there is data left. + * @return true if there is data remaining in the buffer. + */ + public synchronized boolean hasRemaining() { + return byteBuffer.hasRemaining(); + } - /** - * Check if there is data left. - * @return true if there is data remaining in the buffer. - */ - public synchronized boolean hasRemaining() { - return byteBuffer.hasRemaining(); - } + @Override + public synchronized void mark(int readlimit) { + LOG.debug("mark at {}", position()); + byteBuffer.mark(); + } - @Override - public synchronized void mark(int readlimit) { - LOG.debug("mark at {}", position()); - byteBuffer.mark(); - } + @Override + public synchronized void reset() throws IOException { + LOG.debug("reset"); + byteBuffer.reset(); + } - @Override - public synchronized void reset() throws IOException { - LOG.debug("reset"); - byteBuffer.reset(); - } + @Override + public boolean markSupported() { + return true; + } - @Override - public boolean markSupported() { - return true; - } + /** + * Read in data. + * @param b destination buffer + * @param offset offset within the buffer + * @param length length of bytes to read + * @throws EOFException if the position is negative + * @throws IndexOutOfBoundsException if there isn't space for the + * amount of data requested. + * @throws IllegalArgumentException other arguments are invalid. + */ + @SuppressWarnings("NullableProblems") + public synchronized int read(byte[] b, int offset, int length) + throws IOException { + Preconditions.checkArgument(length >= 0, "length is negative"); + Preconditions.checkArgument(b != null, "Null buffer"); + if (b.length - offset < length) { + throw new IndexOutOfBoundsException( + FSExceptionMessages.TOO_MANY_BYTES_FOR_DEST_BUFFER + + ": request length =" + length + + ", with offset =" + offset + + "; buffer capacity =" + (b.length - offset)); + } + verifyOpen(); + if (!hasRemaining()) { + return -1; + } - /** - * Read in data. - * @param buffer destination buffer - * @param offset offset within the buffer - * @param length length of bytes to read - * @throws EOFException if the position is negative - * @throws IndexOutOfBoundsException if there isn't space for the - * amount of data requested. - * @throws IllegalArgumentException other arguments are invalid. - */ - @SuppressWarnings("NullableProblems") - public synchronized int read(byte[] buffer, int offset, int length) - throws IOException { - Preconditions.checkArgument(length >= 0, "length is negative"); - Preconditions.checkArgument(buffer != null, "Null buffer"); - if (buffer.length - offset < length) { - throw new IndexOutOfBoundsException( - FSExceptionMessages.TOO_MANY_BYTES_FOR_DEST_BUFFER - + ": request length =" + length - + ", with offset =" + offset - + "; buffer capacity =" + (buffer.length - offset)); - } - verifyOpen(); - if (!hasRemaining()) { - return -1; + int toRead = Math.min(length, available()); + byteBuffer.get(b, offset, toRead); + return toRead; } - int toRead = Math.min(length, available()); - byteBuffer.get(buffer, offset, toRead); - return toRead; - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder( - "ByteBufferInputStream{"); - sb.append("size=").append(size); - ByteBuffer buffer = this.byteBuffer; - if (buffer != null) { - sb.append(", available=").append(buffer.remaining()); + @Override + public String toString() { + final StringBuilder sb = new StringBuilder( + "ByteBufferInputStream{"); + sb.append("size=").append(size); + ByteBuffer buf = this.byteBuffer; + if (buf != null) { + sb.append(", available=").append(buf.remaining()); + } + sb.append(", ").append(ByteBufferBlock.super.toString()); + sb.append('}'); + return sb.toString(); } - sb.append('}'); - return sb.toString(); } } } @@ -663,22 +794,29 @@ public String toString() { } /** - * Create a temp file and a block which writes to it. + * Create a temp file and a {@link DiskBlock} instance to manage it. + * + * @param index block index * @param limit limit of the block. + * @param statistics statistics to update * @return the new block * @throws IOException IO problems */ @Override - DataBlock create(int limit) throws IOException { + DataBlock create(long index, + int limit, + S3AInstrumentation.OutputStreamStatistics statistics) + throws IOException { File destFile = getOwner() - .createTmpFileForWrite("s3ablock", limit, getOwner().getConf()); - return new DiskBlock(destFile, limit); + .createTmpFileForWrite(String.format("s3ablock-%04d-", index), + limit, getOwner().getConf()); + return new DiskBlock(destFile, limit, index, statistics); } } /** * Stream to a file. - * This will stop at the limit; the caller is expected to create a new block + * This will stop at the limit; the caller is expected to create a new block. */ static class DiskBlock extends DataBlock { @@ -686,12 +824,17 @@ DataBlock create(int limit) throws IOException { private final File bufferFile; private final int limit; private BufferedOutputStream out; - private InputStream uploadStream; + private final AtomicBoolean closed = new AtomicBoolean(false); - DiskBlock(File bufferFile, int limit) + DiskBlock(File bufferFile, + int limit, + long index, + S3AInstrumentation.OutputStreamStatistics statistics) throws FileNotFoundException { + super(index, statistics); this.limit = limit; this.bufferFile = bufferFile; + blockAllocated(); out = new BufferedOutputStream(new FileOutputStream(bufferFile)); } @@ -720,7 +863,7 @@ int write(byte[] b, int offset, int len) throws IOException { } @Override - InputStream startUpload() throws IOException { + BlockUploadData startUpload() throws IOException { super.startUpload(); try { out.flush(); @@ -728,8 +871,7 @@ InputStream startUpload() throws IOException { out.close(); out = null; } - uploadStream = new FileInputStream(bufferFile); - return new FileDeletingInputStream(uploadStream); + return new BlockUploadData(bufferFile); } /** @@ -737,6 +879,7 @@ InputStream startUpload() throws IOException { * exists. * @throws IOException IO problems */ + @SuppressWarnings("UnnecessaryDefault") @Override protected void innerClose() throws IOException { final DestState state = getState(); @@ -745,20 +888,19 @@ protected void innerClose() throws IOException { case Writing: if (bufferFile.exists()) { // file was not uploaded - LOG.debug("Deleting buffer file as upload did not start"); - boolean deleted = bufferFile.delete(); - if (!deleted && bufferFile.exists()) { - LOG.warn("Failed to delete buffer file {}", bufferFile); - } + LOG.debug("Block[{}]: Deleting buffer file as upload did not start", + index); + closeBlock(); } break; case Upload: - LOG.debug("Buffer file {} exists —close upload stream", bufferFile); + LOG.debug("Block[{}]: Buffer file {} exists —close upload stream", + index, bufferFile); break; case Closed: - // no-op + closeBlock(); break; default: @@ -780,7 +922,8 @@ void flush() throws IOException { @Override public String toString() { String sb = "FileBlock{" - + "destFile=" + bufferFile + + + "index=" + index + + ", destFile=" + bufferFile + ", state=" + getState() + ", dataSize=" + dataSize() + ", limit=" + limit + @@ -789,31 +932,20 @@ public String toString() { } /** - * An input stream which deletes the buffer file when closed. + * Close the block. + * This will delete the block's buffer file if the block has + * not previously been closed. */ - private final class FileDeletingInputStream extends FilterInputStream { - private final AtomicBoolean closed = new AtomicBoolean(false); - - FileDeletingInputStream(InputStream source) { - super(source); - } - - /** - * Delete the input file when closed. - * @throws IOException IO problem - */ - @Override - public void close() throws IOException { - try { - super.close(); - } finally { - if (!closed.getAndSet(true)) { - if (!bufferFile.delete()) { - LOG.warn("delete({}) returned false", - bufferFile.getAbsoluteFile()); - } - } + void closeBlock() { + LOG.debug("block[{}]: closeBlock()", index); + if (!closed.getAndSet(true)) { + blockReleased(); + if (!bufferFile.delete() && bufferFile.exists()) { + LOG.warn("delete({}) returned false", + bufferFile.getAbsoluteFile()); } + } else { + LOG.debug("block[{}]: skipping re-entrant closeBlock()", index); } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 5ac18a9..acd1da1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -29,7 +29,10 @@ import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.concurrent.ExecutorService; import java.util.Objects; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; @@ -128,7 +131,8 @@ private long partSize; private boolean enableMultiObjectsDelete; private TransferManager transfers; - private ListeningExecutorService threadPoolExecutor; + private ListeningExecutorService boundedThreadPool; + private ExecutorService unboundedThreadPool; private long multiPartThreshold; public static final Logger LOG = LoggerFactory.getLogger(S3AFileSystem.class); private static final Logger PROGRESS = @@ -152,21 +156,28 @@ /** Called after a new FileSystem instance is constructed. * @param name a uri whose authority section names the host, port, etc. * for this FileSystem - * @param conf the configuration + * @param originalConf the configuration to use for the FS. The + * bucket-specific options are patched over the base ones before any use is + * made of the config. */ - public void initialize(URI name, Configuration conf) throws IOException { + public void initialize(URI name, Configuration originalConf) + throws IOException { + uri = S3xLoginHelper.buildFSURI(name); + // get the host; this is guaranteed to be non-null, non-empty + bucket = name.getHost(); + // clone the configuration into one with propagated bucket options + Configuration conf = propagateBucketOptions(originalConf, bucket); + patchSecurityCredentialProviders(conf); super.initialize(name, conf); setConf(conf); try { instrumentation = new S3AInstrumentation(name); - uri = S3xLoginHelper.buildFSURI(name); // Username is the current user at the time the FS was instantiated. username = UserGroupInformation.getCurrentUser().getShortUserName(); workingDir = new Path("/user", username) .makeQualified(this.uri, this.getWorkingDirectory()); - bucket = name.getHost(); Class s3ClientFactoryClass = conf.getClass( S3_CLIENT_FACTORY_IMPL, DEFAULT_S3_CLIENT_FACTORY_IMPL, @@ -206,11 +217,17 @@ public StorageStatistics provide() { MAX_TOTAL_TASKS, DEFAULT_MAX_TOTAL_TASKS, 1); long keepAliveTime = longOption(conf, KEEPALIVE_TIME, DEFAULT_KEEPALIVE_TIME, 0); - threadPoolExecutor = BlockingThreadPoolExecutorService.newInstance( + boundedThreadPool = BlockingThreadPoolExecutorService.newInstance( maxThreads, maxThreads + totalTasks, keepAliveTime, TimeUnit.SECONDS, "s3a-transfer-shared"); + unboundedThreadPool = new ThreadPoolExecutor( + maxThreads, Integer.MAX_VALUE, + keepAliveTime, TimeUnit.SECONDS, + new LinkedBlockingQueue(), + BlockingThreadPoolExecutorService.newDaemonThreadFactory( + "s3a-transfer-unbounded")); initTransferManager(); @@ -288,7 +305,7 @@ private void initTransferManager() { transferConfiguration.setMultipartCopyPartSize(partSize); transferConfiguration.setMultipartCopyThreshold(multiPartThreshold); - transfers = new TransferManager(s3, threadPoolExecutor); + transfers = new TransferManager(s3, unboundedThreadPool); transfers.setConfiguration(transferConfiguration); } @@ -557,7 +574,7 @@ public FSDataOutputStream create(Path f, FsPermission permission, output = new FSDataOutputStream( new S3ABlockOutputStream(this, key, - new SemaphoredDelegatingExecutor(threadPoolExecutor, + new SemaphoredDelegatingExecutor(boundedThreadPool, blockOutputActiveBlocks, true), progress, partSize, @@ -976,6 +993,7 @@ private void deleteObjects(DeleteObjectsRequest deleteRequest) { */ public PutObjectRequest newPutObjectRequest(String key, ObjectMetadata metadata, File srcfile) { + Preconditions.checkNotNull(srcfile); PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key, srcfile); putObjectRequest.setCannedAcl(cannedACL); @@ -992,8 +1010,9 @@ public PutObjectRequest newPutObjectRequest(String key, * @param inputStream source data. * @return the request */ - PutObjectRequest newPutObjectRequest(String key, + private PutObjectRequest newPutObjectRequest(String key, ObjectMetadata metadata, InputStream inputStream) { + Preconditions.checkNotNull(inputStream); PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key, inputStream, metadata); putObjectRequest.setCannedAcl(cannedACL); @@ -1031,12 +1050,16 @@ public ObjectMetadata newObjectMetadata(long length) { } /** - * PUT an object, incrementing the put requests and put bytes + * Start a transfer-manager managed async PUT of an object, + * incrementing the put requests and put bytes * counters. * It does not update the other counters, * as existing code does that as progress callbacks come in. * Byte length is calculated from the file length, or, if there is no * file, from the content length of the header. + * Because the operation is async, any stream supplied in the request + * must reference data (files, buffers) which stay valid until the upload + * completes. * @param putObjectRequest the request * @return the upload initiated */ @@ -1062,6 +1085,7 @@ public Upload putObject(PutObjectRequest putObjectRequest) { * PUT an object directly (i.e. not via the transfer manager). * Byte length is calculated from the file length, or, if there is no * file, from the content length of the header. + * Important: this call will close any input stream in the request. * @param putObjectRequest the request * @return the upload initiated * @throws AmazonClientException on problems @@ -1087,7 +1111,8 @@ public PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest) /** * Upload part of a multi-partition file. - * Increments the write and put counters + * Increments the write and put counters. + * Important: this call does not close any input stream in the request. * @param request request * @return the result of the operation. * @throws AmazonClientException on problems @@ -1948,7 +1973,8 @@ public String toString() { if (blockFactory != null) { sb.append(", blockFactory=").append(blockFactory); } - sb.append(", executor=").append(threadPoolExecutor); + sb.append(", boundedExecutor=").append(boundedThreadPool); + sb.append(", unboundedExecutor=").append(unboundedThreadPool); sb.append(", statistics {") .append(statistics) .append("}"); @@ -2184,14 +2210,28 @@ private WriteOperationHelper(String key) { /** * Create a {@link PutObjectRequest} request. - * The metadata is assumed to have been configured with the size of the - * operation. + * If {@code length} is set, the metadata is configured with the size of + * the upload. * @param inputStream source data. * @param length size, if known. Use -1 for not known * @return the request */ PutObjectRequest newPutRequest(InputStream inputStream, long length) { - return newPutObjectRequest(key, newObjectMetadata(length), inputStream); + PutObjectRequest request = newPutObjectRequest(key, + newObjectMetadata(length), inputStream); + return request; + } + + /** + * Create a {@link PutObjectRequest} request to upload a file. + * @param sourceFile source file + * @return the request + */ + PutObjectRequest newPutRequest(File sourceFile) { + int length = (int) sourceFile.length(); + PutObjectRequest request = newPutObjectRequest(key, + newObjectMetadata(length), sourceFile); + return request; } /** @@ -2253,6 +2293,8 @@ CompleteMultipartUploadResult completeMultipartUpload(String uploadId, Preconditions.checkNotNull(partETags); Preconditions.checkArgument(!partETags.isEmpty(), "No partitions have been uploaded"); + LOG.debug("Completing multipart upload {} with {} parts", + uploadId, partETags.size()); return s3.completeMultipartUpload( new CompleteMultipartUploadRequest(bucket, key, @@ -2263,42 +2305,51 @@ CompleteMultipartUploadResult completeMultipartUpload(String uploadId, /** * Abort a multipart upload operation. * @param uploadId multipart operation Id - * @return the result * @throws AmazonClientException on problems. */ void abortMultipartUpload(String uploadId) throws AmazonClientException { + LOG.debug("Aborting multipart upload {}", uploadId); s3.abortMultipartUpload( new AbortMultipartUploadRequest(bucket, key, uploadId)); } /** * Create and initialize a part request of a multipart upload. + * Exactly one of: {@code uploadStream} or {@code sourceFile} + * must be specified. * @param uploadId ID of ongoing upload - * @param uploadStream source of data to upload * @param partNumber current part number of the upload * @param size amount of data + * @param uploadStream source of data to upload + * @param sourceFile optional source file. * @return the request. */ UploadPartRequest newUploadPartRequest(String uploadId, - InputStream uploadStream, - int partNumber, - int size) { + int partNumber, int size, InputStream uploadStream, File sourceFile) { Preconditions.checkNotNull(uploadId); - Preconditions.checkNotNull(uploadStream); + // exactly one source must be set; xor verifies this + Preconditions.checkArgument((uploadStream != null) ^ (sourceFile != null), + "Data source"); Preconditions.checkArgument(size > 0, "Invalid partition size %s", size); - Preconditions.checkArgument(partNumber> 0 && partNumber <=10000, + Preconditions.checkArgument(partNumber > 0 && partNumber <= 10000, "partNumber must be between 1 and 10000 inclusive, but is %s", partNumber); LOG.debug("Creating part upload request for {} #{} size {}", uploadId, partNumber, size); - return new UploadPartRequest() + UploadPartRequest request = new UploadPartRequest() .withBucketName(bucket) .withKey(key) .withUploadId(uploadId) - .withInputStream(uploadStream) .withPartNumber(partNumber) .withPartSize(size); + if (uploadStream != null) { + // there's an upload stream. Bind to it. + request.setInputStream(uploadStream); + } else { + request.setFile(sourceFile); + } + return request; } /** @@ -2313,6 +2364,21 @@ public String toString() { sb.append('}'); return sb.toString(); } + + /** + * PUT an object directly (i.e. not via the transfer manager). + * @param putObjectRequest the request + * @return the upload initiated + * @throws IOException on problems + */ + PutObjectResult putObject(PutObjectRequest putObjectRequest) + throws IOException { + try { + return putObjectDirect(putObjectRequest); + } catch (AmazonClientException e) { + throw translateException("put", putObjectRequest.getKey(), e); + } + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java index fb8c852..d2e7a88 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInstrumentation.java @@ -36,6 +36,7 @@ import java.util.HashMap; import java.util.Map; import java.util.UUID; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.fs.FileSystem.Statistics; @@ -428,7 +429,7 @@ public void decrementGauge(Statistic op, long count) { if (gauge != null) { gauge.decr(count); } else { - LOG.debug("No Gauge: " + op); + LOG.debug("No Gauge: {}", op); } } @@ -676,6 +677,8 @@ private void mergeOutputStreamStatistics(OutputStreamStatistics statistics) { private final AtomicLong transferDuration = new AtomicLong(0); private final AtomicLong queueDuration = new AtomicLong(0); private final AtomicLong exceptionsInMultipartFinalize = new AtomicLong(0); + private final AtomicInteger blocksAllocated = new AtomicInteger(0); + private final AtomicInteger blocksReleased = new AtomicInteger(0); private Statistics statistics; @@ -684,6 +687,20 @@ public OutputStreamStatistics(Statistics statistics){ } /** + * A block has been allocated. + */ + void blockAllocated() { + blocksAllocated.incrementAndGet(); + } + + /** + * A block has been released. + */ + void blockReleased() { + blocksReleased.incrementAndGet(); + } + + /** * Block is queued for upload. */ void blockUploadQueued(int blockSize) { @@ -778,6 +795,24 @@ long totalUploadDuration() { return queueDuration.get() + transferDuration.get(); } + public int blocksAllocated() { + return blocksAllocated.get(); + } + + public int blocksReleased() { + return blocksReleased.get(); + } + + /** + * Get counters of blocks actively allocated; my be inaccurate + * if the numbers change during the (non-synchronized) calculation. + * @return the number of actively allocated blocks. + */ + public int blocksActivelyAllocated() { + return blocksAllocated.get() - blocksReleased.get(); + } + + @Override public String toString() { final StringBuilder sb = new StringBuilder( @@ -789,6 +824,9 @@ public String toString() { sb.append(", blockUploadsFailed=").append(blockUploadsFailed); sb.append(", bytesPendingUpload=").append(bytesPendingUpload); sb.append(", bytesUploaded=").append(bytesUploaded); + sb.append(", blocksAllocated=").append(blocksAllocated); + sb.append(", blocksReleased=").append(blocksReleased); + sb.append(", blocksActivelyAllocated=").append(blocksActivelyAllocated()); sb.append(", exceptionsInMultipartFinalize=").append( exceptionsInMultipartFinalize); sb.append(", transferDuration=").append(transferDuration).append(" ms"); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index aeb8403..77e9d18 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -35,6 +35,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.security.ProviderUtils; + +import com.google.common.collect.Lists; import org.slf4j.Logger; import java.io.EOFException; @@ -46,15 +48,13 @@ import java.lang.reflect.Modifier; import java.net.URI; import java.nio.file.AccessDeniedException; +import java.util.Collection; import java.util.Date; +import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; -import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; -import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; -import static org.apache.hadoop.fs.s3a.Constants.MULTIPART_MIN_SIZE; -import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; +import static org.apache.hadoop.fs.s3a.Constants.*; /** * Utility methods for S3A code. @@ -74,6 +74,13 @@ "is abstract and therefore cannot be created"; static final String ENDPOINT_KEY = "Endpoint"; + /** + * Core property for provider path. Duplicated here for consistent + * code across Hadoop version: {@value}. + */ + static final String CREDENTIAL_PROVIDER_PATH = + "hadoop.security.credential.provider.path"; + private S3AUtils() { } @@ -636,4 +643,110 @@ private static Method getFactoryMethod(Class cl, Class returnType, return null; } } + + /** + * Propagates bucket-specific settings into generic S3A configuration keys. + * This is done by propagating the values of the form + * {@code fs.s3a.bucket.${bucket}.key} to + * {@code fs.s3a.key}, for all values of "key" other than a small set + * of unmodifiable values. + * + * The source of the updated property is set to the key name of the bucket + * property, to aid in diagnostics of where things came from. + * + * Returns a new configuration. Why the clone? + * You can use the same conf for different filesystems, and the original + * values are not updated. + * + * The {@code fs.s3a.impl} property cannot be set, nor can + * any with the prefix {@code fs.s3a.bucket}. + * + * This method does not propagate security provider path information from + * the S3A property into the Hadoop common provider: callers must call + * {@link #patchSecurityCredentialProviders(Configuration)} explicitly. + * @param source Source Configuration object. + * @param bucket bucket name. Must not be empty. + * @return a (potentially) patched clone of the original. + */ + public static Configuration propagateBucketOptions(Configuration source, + String bucket) { + + Preconditions.checkArgument(StringUtils.isNotEmpty(bucket), "bucket"); + final String bucketPrefix = FS_S3A_BUCKET_PREFIX + bucket +'.'; + LOG.debug("Propagating entries under {}", bucketPrefix); + final Configuration dest = new Configuration(source); + for (Map.Entry entry : source) { + final String key = entry.getKey(); + // get the (unexpanded) value. + final String value = entry.getValue(); + if (!key.startsWith(bucketPrefix) || bucketPrefix.equals(key)) { + continue; + } + // there's a bucket prefix, so strip it + final String stripped = key.substring(bucketPrefix.length()); + if (stripped.startsWith("bucket.") || "impl".equals(stripped)) { + //tell user off + LOG.debug("Ignoring bucket option {}", key); + } else { + // propagate the value, building a new origin field. + // to track overwrites, the generic key is overwritten even if + // already matches the new one. + final String generic = FS_S3A_PREFIX + stripped; + LOG.debug("Updating {}", generic); + dest.set(generic, value, key); + } + } + return dest; + } + + /** + * Patch the security credential provider information in + * {@link #CREDENTIAL_PROVIDER_PATH} + * with the providers listed in + * {@link Constants#S3A_SECURITY_CREDENTIAL_PROVIDER_PATH}. + * + * This allows different buckets to use different credential files. + * @param conf configuration to patch + */ + static void patchSecurityCredentialProviders(Configuration conf) { + Collection customCredentials = conf.getStringCollection( + S3A_SECURITY_CREDENTIAL_PROVIDER_PATH); + Collection hadoopCredentials = conf.getStringCollection( + CREDENTIAL_PROVIDER_PATH); + if (!customCredentials.isEmpty()) { + List all = Lists.newArrayList(customCredentials); + all.addAll(hadoopCredentials); + String joined = StringUtils.join(all, ','); + LOG.debug("Setting {} to {}", CREDENTIAL_PROVIDER_PATH, + joined); + conf.set(CREDENTIAL_PROVIDER_PATH, joined, + "patch of " + S3A_SECURITY_CREDENTIAL_PROVIDER_PATH); + } + } + + /** + * Close the Closeable objects and ignore any Exception or + * null pointers. + * (This is the SLF4J equivalent of that in {@code IOUtils}). + * @param log the log to log at debug level. Can be null. + * @param closeables the objects to close + */ + public static void closeAll(Logger log, + java.io.Closeable... closeables) { + for (java.io.Closeable c : closeables) { + if (c != null) { + try { + if (log != null) { + log.debug("Closing {}", c); + } + c.close(); + } catch (Exception e) { + if (log != null && log.isDebugEnabled()) { + log.debug("Exception in closing {}", c, e); + } + } + } + } + } + } diff --git a/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem b/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem index 0e3c42a..3cd1d6b 100644 --- a/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem +++ b/hadoop-tools/hadoop-aws/src/main/resources/META-INF/services/org.apache.hadoop.fs.FileSystem @@ -15,4 +15,3 @@ org.apache.hadoop.fs.s3.S3FileSystem org.apache.hadoop.fs.s3native.NativeS3FileSystem -org.apache.hadoop.fs.s3a.S3AFileSystem diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 0cb64a2..ede7eb4 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -284,6 +284,7 @@ properties, the Hadoop key management store and IAM roles. * Test suites includes distcp and suites in downstream projects. * Available since Hadoop 2.6; considered production ready in Hadoop 2.7. * Actively maintained. +* Supports per-bucket configuration. S3A is now the recommended client for working with S3 objects. It is also the one where patches for functionality and performance are very welcome. @@ -638,6 +639,29 @@ in XML configuration files. Because this property only supplies the path to the secrets file, the configuration option itself is no longer a sensitive item. +The property `hadoop.security.credential.provider.path` is global to all +filesystems and secrets. +There is another property, `fs.s3a.security.credential.provider.path` +which only lists credential providers for S3A filesystems. +The two properties are combined into one, with the list of providers in the +`fs.s3a.` property taking precedence +over that of the `hadoop.security` list (i.e. they are prepended to the common list). + +```xml + + fs.s3a.security.credential.provider.path + + + Optional comma separated list of credential providers, a list + which is prepended to that set in hadoop.security.credential.provider.path + + +``` + +Supporting a separate list in an `fs.s3a.` prefix permits per-bucket configuration +of credential files. + + ###### Using the credentials Once the provider is set in the Hadoop configuration, hadoop commands @@ -660,7 +684,7 @@ hadoop distcp \ hdfs://nn1.example.com:9001/user/backup/007020615 s3a://glacier1/ hadoop fs \ - -D hadoop.security.credential.provider.path=jceks://hdfs@nn1.example.com:9001/user/backup/s3.jceks \ + -D fs.s3a.security.credential.provider.path=jceks://hdfs@nn1.example.com:9001/user/backup/s3.jceks \ -ls s3a://glacier1/ ``` @@ -898,6 +922,78 @@ from placing its declaration on the command line. any call to setReadahead() is made to an open stream. +### Configurations different S3 buckets + +Different S3 buckets can be accessed with different S3A client configurations. +This allows for different endpoints, data read and write strategies, as well +as login details. + +1. All `fs.s3a` options other than a small set of unmodifiable values + (currently `fs.s3a.impl`) can be set on a per bucket basis. +1. The bucket specific option is set by replacing the `fs.s3a.` prefix on an option +with `fs.s3a.bucket.BUCKETNAME.`, where `BUCKETNAME` is the name of the bucket. +1. When connecting to a bucket, all options explicitly set will override +the base `fs.s3a.` values. + +As an example, a configuration could have a base configuration to use the IAM +role information available when deployed in Amazon EC2. + +```xml + + fs.s3a.aws.credentials.provider + org.apache.hadoop.fs.s3a.SharedInstanceProfileCredentialsProvider + +``` + +This will be the default authentication mechanism for S3A buckets. + +A bucket `s3a://nightly/` used for nightly data uses a session key: + +```xml + + fs.s3a.bucket.nightly.access.key + AKAACCESSKEY-2 + + + + fs.s3a.bucket.nightly.secret.key + SESSIONSECRETKEY + + + + fs.s3a.bucket.nightly.session.token + Short-lived-session-token + + + + fs.s3a.bucket.nightly.aws.credentials.provider + org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider + +``` + +Finally, the public `s3a://landsat-pds/` bucket is accessed anonymously: + +```xml + + fs.s3a.bucket.landsat-pds.aws.credentials.provider + org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider + +``` + +**Customizing S3A secrets held in credential files** + +Although most properties are automatically propagated from their +`fs.s3a.bucket.`-prefixed custom entry to that of the base `fs.s3a.` option +supporting secrets kept in Hadoop credential files is slightly more complex. +This is because the property values are kept in these files, and cannot be +dynamically patched. + +Instead, callers need to create different configuration files for each +bucket, setting the base secrets (`fs.s3a.bucket.nightly.access.key`, etc), +then declare the path to the appropriate credential file in +a bucket-specific version of the property `fs.s3a.security.credential.provider.path`. + + ### Working with buckets in different regions S3 Buckets are hosted in different regions, the default being US-East. @@ -953,6 +1049,16 @@ If the wrong endpoint is used, the request may fail. This may be reported as a 3 or as a 400 Bad Request. +If you are trying to mix endpoints for different buckets, use a per-bucket endpoint +declaration. For example: + +```xml + + fs.s3a.bucket.landsat-pds.endpoint + s3.amazonaws.com + The endpoint for s3a://landsat-pds URLs + +``` ### Stabilizing: S3A Fast Upload @@ -1632,15 +1738,15 @@ org.apache.hadoop.fs.s3a.AWSS3IOException: Received permanent redirect response 1. If not using "V4" authentication (see above), the original S3 endpoint can be used: -``` - - fs.s3a.endpoint - s3.amazonaws.com - +```xml + + fs.s3a.endpoint + s3.amazonaws.com + ``` -Using the explicit endpoint for the region is recommended for speed and the -ability to use the V4 signing API. +Using the explicit endpoint for the region is recommended for speed and +to use the V4 signing API. ### "Timeout waiting for connection from pool" when writing to S3A @@ -2226,32 +2332,33 @@ is hosted in Amazon's US-east datacenter. 1. If the property is set to a different path, then that data must be readable and "sufficiently" large. -To test on different S3 endpoints, or alternate infrastructures supporting -the same APIs, the option `fs.s3a.scale.test.csvfile` must either be -set to " ", or an object of at least 10MB is uploaded to the object store, and -the `fs.s3a.scale.test.csvfile` option set to its path. +(the reason the space or newline is needed is to add "an empty entry"; an empty +`` would be considered undefined and pick up the default) + +Of using a test file in an S3 region requiring a different endpoint value +set in `fs.s3a.endpoint`, a bucket-specific endpoint must be defined. +For the default test dataset, hosted in the `landsat-pds` bucket, this is: ```xml - fs.s3a.scale.test.csvfile - + fs.s3a.bucket.landsat-pds.endpoint + s3.amazonaws.com + The endpoint for s3a://landsat-pds URLs ``` -(the reason the space or newline is needed is to add "an empty entry"; an empty -`` would be considered undefined and pick up the default) - -*Note:* if using a test file in an S3 region requiring a different endpoint value -set in `fs.s3a.endpoint`, define it in `fs.s3a.scale.test.csvfile.endpoint`. -If the default CSV file is used, the tests will automatically use the us-east -endpoint: +To test on alternate infrastructures supporting +the same APIs, the option `fs.s3a.scale.test.csvfile` must either be +set to " ", or an object of at least 10MB is uploaded to the object store, and +the `fs.s3a.scale.test.csvfile` option set to its path. ```xml - fs.s3a.scale.test.csvfile.endpoint - s3.amazonaws.com + fs.s3a.scale.test.csvfile + ``` + ### Viewing Integration Test Reports diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 819d9d8..1c32996 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -128,7 +128,6 @@ public void testAnonymousProvider() throws Exception { AnonymousAWSCredentialsProvider.class.getName()); Path testFile = new Path( conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE)); - S3ATestUtils.useCSVDataEndpoint(conf); FileSystem fs = FileSystem.newInstance(testFile.toUri(), conf); assertNotNull(fs); assertTrue(fs instanceof S3AFileSystem); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java index 74cad00..87f676c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputArray.java @@ -24,9 +24,12 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.io.IOUtils; +import org.junit.BeforeClass; import org.junit.Test; import java.io.IOException; +import java.io.InputStream; +import java.net.URI; import static org.apache.hadoop.fs.s3a.Constants.*; @@ -38,6 +41,14 @@ * multipart tests are kept in scale tests. */ public class ITestS3ABlockOutputArray extends AbstractS3ATestBase { + private static final int BLOCK_SIZE = 256 * 1024; + + private static byte[] dataset; + + @BeforeClass + public static void setupDataset() { + dataset = ContractTestUtils.dataset(BLOCK_SIZE, 0, 256); + } @Override protected Configuration createConfiguration() { @@ -65,9 +76,9 @@ public void testRegularUpload() throws IOException { } @Test(expected = IOException.class) - public void testDoubleStreamClose() throws Throwable { - Path dest = path("testDoubleStreamClose"); - describe(" testDoubleStreamClose"); + public void testWriteAfterStreamClose() throws Throwable { + Path dest = path("testWriteAfterStreamClose"); + describe(" testWriteAfterStreamClose"); FSDataOutputStream stream = getFileSystem().create(dest, true); byte[] data = ContractTestUtils.dataset(16, 'a', 26); try { @@ -79,7 +90,25 @@ public void testDoubleStreamClose() throws Throwable { } } - public void verifyUpload(String name, int fileSize) throws IOException { + @Test + public void testBlocksClosed() throws Throwable { + Path dest = path("testBlocksClosed"); + describe(" testBlocksClosed"); + FSDataOutputStream stream = getFileSystem().create(dest, true); + S3AInstrumentation.OutputStreamStatistics statistics + = S3ATestUtils.getOutputStreamStatistics(stream); + byte[] data = ContractTestUtils.dataset(16, 'a', 26); + stream.write(data); + LOG.info("closing output stream"); + stream.close(); + assertEquals("total allocated blocks in " + statistics, + 1, statistics.blocksAllocated()); + assertEquals("actively allocated blocks in " + statistics, + 0, statistics.blocksActivelyAllocated()); + LOG.info("end of test case"); + } + + private void verifyUpload(String name, int fileSize) throws IOException { Path dest = path(name); describe(name + " upload to " + dest); ContractTestUtils.createAndVerifyFile( @@ -87,4 +116,43 @@ public void verifyUpload(String name, int fileSize) throws IOException { dest, fileSize); } + + /** + * Create a factory for used in mark/reset tests. + * @param fileSystem source FS + * @return the factory + */ + protected S3ADataBlocks.BlockFactory createFactory(S3AFileSystem fileSystem) { + return new S3ADataBlocks.ArrayBlockFactory(fileSystem); + } + + private void markAndResetDatablock(S3ADataBlocks.BlockFactory factory) + throws Exception { + S3AInstrumentation instrumentation = + new S3AInstrumentation(new URI("s3a://example")); + S3AInstrumentation.OutputStreamStatistics outstats + = instrumentation.newOutputStreamStatistics(null); + S3ADataBlocks.DataBlock block = factory.create(1, BLOCK_SIZE, outstats); + block.write(dataset, 0, dataset.length); + S3ADataBlocks.BlockUploadData uploadData = block.startUpload(); + InputStream stream = uploadData.getUploadStream(); + assertNotNull(stream); + assertTrue("Mark not supported in " + stream, stream.markSupported()); + assertEquals(0, stream.read()); + stream.mark(BLOCK_SIZE); + // read a lot + long l = 0; + while (stream.read() != -1) { + // do nothing + l++; + } + stream.reset(); + assertEquals(1, stream.read()); + } + + @Test + public void testMarkReset() throws Throwable { + markAndResetDatablock(createFactory(getFileSystem())); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputByteBuffer.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputByteBuffer.java index 504426b..02f3de0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputByteBuffer.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputByteBuffer.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.fs.s3a; - /** * Use {@link Constants#FAST_UPLOAD_BYTEBUFFER} for buffering. */ @@ -27,4 +26,8 @@ protected String getBlockOutputBufferName() { return Constants.FAST_UPLOAD_BYTEBUFFER; } + protected S3ADataBlocks.BlockFactory createFactory(S3AFileSystem fileSystem) { + return new S3ADataBlocks.ByteBufferBlockFactory(fileSystem); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputDisk.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputDisk.java index 550706d..abe8656 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputDisk.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABlockOutputDisk.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.s3a; +import org.junit.Assume; + /** * Use {@link Constants#FAST_UPLOAD_BUFFER_DISK} for buffering. */ @@ -27,4 +29,14 @@ protected String getBlockOutputBufferName() { return Constants.FAST_UPLOAD_BUFFER_DISK; } + /** + * The disk stream doesn't support mark/reset; calls + * {@code Assume} to skip the test. + * @param fileSystem source FS + * @return null + */ + protected S3ADataBlocks.BlockFactory createFactory(S3AFileSystem fileSystem) { + Assume.assumeTrue("mark/reset nopt supoprted", false); + return null; + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index baf0f79..6625dc2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -35,6 +35,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotEquals; import static org.junit.Assert.assertNotNull; @@ -45,6 +46,7 @@ import java.io.File; import java.net.URI; import java.security.PrivilegedExceptionAction; +import java.util.Collection; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -54,6 +56,10 @@ import org.apache.http.HttpStatus; import org.junit.rules.TemporaryFolder; +import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.S3AUtils.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; + /** * S3A tests for configuration. */ @@ -535,4 +541,128 @@ public S3AFileSystem run() throws Exception{ fieldType.isAssignableFrom(obj.getClass())); return fieldType.cast(obj); } + + @Test + public void testBucketConfigurationPropagation() throws Throwable { + Configuration config = new Configuration(false); + setBucketOption(config, "b", "base", "1024"); + String basekey = "fs.s3a.base"; + assertOptionEquals(config, basekey, null); + String bucketKey = "fs.s3a.bucket.b.base"; + assertOptionEquals(config, bucketKey, "1024"); + Configuration updated = propagateBucketOptions(config, "b"); + assertOptionEquals(updated, basekey, "1024"); + // original conf is not updated + assertOptionEquals(config, basekey, null); + + String[] sources = updated.getPropertySources(basekey); + assertEquals(1, sources.length); + String sourceInfo = sources[0]; + assertTrue("Wrong source " + sourceInfo, sourceInfo.contains(bucketKey)); + } + + @Test + public void testBucketConfigurationPropagationResolution() throws Throwable { + Configuration config = new Configuration(false); + String basekey = "fs.s3a.base"; + String baseref = "fs.s3a.baseref"; + String baseref2 = "fs.s3a.baseref2"; + config.set(basekey, "orig"); + config.set(baseref2, "${fs.s3a.base}"); + setBucketOption(config, "b", basekey, "1024"); + setBucketOption(config, "b", baseref, "${fs.s3a.base}"); + Configuration updated = propagateBucketOptions(config, "b"); + assertOptionEquals(updated, basekey, "1024"); + assertOptionEquals(updated, baseref, "1024"); + assertOptionEquals(updated, baseref2, "1024"); + } + + @Test + public void testMultipleBucketConfigurations() throws Throwable { + Configuration config = new Configuration(false); + setBucketOption(config, "b", USER_AGENT_PREFIX, "UA-b"); + setBucketOption(config, "c", USER_AGENT_PREFIX, "UA-c"); + config.set(USER_AGENT_PREFIX, "UA-orig"); + Configuration updated = propagateBucketOptions(config, "c"); + assertOptionEquals(updated, USER_AGENT_PREFIX, "UA-c"); + } + + @Test + public void testBucketConfigurationSkipsUnmodifiable() throws Throwable { + Configuration config = new Configuration(false); + String impl = "fs.s3a.impl"; + config.set(impl, "orig"); + setBucketOption(config, "b", impl, "b"); + String metastoreImpl = "fs.s3a.metadatastore.impl"; + String ddb = "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore"; + setBucketOption(config, "b", metastoreImpl, ddb); + setBucketOption(config, "b", "impl2", "b2"); + setBucketOption(config, "b", "bucket.b.loop", "b3"); + assertOptionEquals(config, "fs.s3a.bucket.b.impl", "b"); + + Configuration updated = propagateBucketOptions(config, "b"); + assertOptionEquals(updated, impl, "orig"); + assertOptionEquals(updated, "fs.s3a.impl2", "b2"); + assertOptionEquals(updated, metastoreImpl, ddb); + assertOptionEquals(updated, "fs.s3a.bucket.b.loop", null); + } + + @Test + public void testConfOptionPropagationToFS() throws Exception { + Configuration config = new Configuration(); + String testFSName = config.getTrimmed(TEST_FS_S3A_NAME, ""); + String bucket = new URI(testFSName).getHost(); + setBucketOption(config, bucket, "propagation", "propagated"); + fs = S3ATestUtils.createTestFileSystem(config); + Configuration updated = fs.getConf(); + assertOptionEquals(updated, "fs.s3a.propagation", "propagated"); + } + + @Test + public void testSecurityCredentialPropagationNoOverride() throws Exception { + Configuration config = new Configuration(); + config.set(CREDENTIAL_PROVIDER_PATH, "base"); + patchSecurityCredentialProviders(config); + assertOptionEquals(config, CREDENTIAL_PROVIDER_PATH, + "base"); + } + + @Test + public void testSecurityCredentialPropagationOverrideNoBase() + throws Exception { + Configuration config = new Configuration(); + config.unset(CREDENTIAL_PROVIDER_PATH); + config.set(S3A_SECURITY_CREDENTIAL_PROVIDER_PATH, "override"); + patchSecurityCredentialProviders(config); + assertOptionEquals(config, CREDENTIAL_PROVIDER_PATH, + "override"); + } + + @Test + public void testSecurityCredentialPropagationOverride() throws Exception { + Configuration config = new Configuration(); + config.set(CREDENTIAL_PROVIDER_PATH, "base"); + config.set(S3A_SECURITY_CREDENTIAL_PROVIDER_PATH, "override"); + patchSecurityCredentialProviders(config); + assertOptionEquals(config, CREDENTIAL_PROVIDER_PATH, + "override,base"); + Collection all = config.getStringCollection( + CREDENTIAL_PROVIDER_PATH); + assertTrue(all.contains("override")); + assertTrue(all.contains("base")); + } + + @Test + public void testSecurityCredentialPropagationEndToEnd() throws Exception { + Configuration config = new Configuration(); + config.set(CREDENTIAL_PROVIDER_PATH, "base"); + setBucketOption(config, "b", S3A_SECURITY_CREDENTIAL_PROVIDER_PATH, + "override"); + Configuration updated = propagateBucketOptions(config, "b"); + + patchSecurityCredentialProviders(updated); + assertOptionEquals(updated, CREDENTIAL_PROVIDER_PATH, + "override,base"); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index 8c22f47..acbe610 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -86,18 +86,6 @@ String DEFAULT_CSVTEST_FILE = "s3a://landsat-pds/scene_list.gz"; /** - * Endpoint for the S3 CSV/scale tests. This defaults to - * being us-east. - */ - String KEY_CSVTEST_ENDPOINT = S3A_SCALE_TEST + "csvfile.endpoint"; - - /** - * Endpoint for the S3 CSV/scale tests. This defaults to - * being us-east. - */ - String DEFAULT_CSVTEST_ENDPOINT = "s3.amazonaws.com"; - - /** * Name of the property to define the timeout for scale tests: {@value}. * Measured in seconds. */ diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 462914c..9528967 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -20,12 +20,14 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.scale.S3AScaleTestBase; import org.junit.Assert; +import org.junit.Assume; import org.junit.internal.AssumptionViolatedException; import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.URI; @@ -34,11 +36,14 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.junit.Assert.*; /** * Utilities for the S3A tests. */ public final class S3ATestUtils { + private static final Logger LOG = LoggerFactory.getLogger( + S3ATestUtils.class); /** * Value to set a system property to (in maven) to declare that @@ -137,20 +142,6 @@ public static FileContext createTestFileContext(Configuration conf) } /** - * patch the endpoint option so that irrespective of where other tests - * are working, the IO performance tests can work with the landsat - * images. - * @param conf configuration to patch - */ - public static void useCSVDataEndpoint(Configuration conf) { - String endpoint = conf.getTrimmed(S3AScaleTestBase.KEY_CSVTEST_ENDPOINT, - S3AScaleTestBase.DEFAULT_CSVTEST_ENDPOINT); - if (!endpoint.isEmpty()) { - conf.set(ENDPOINT, endpoint); - } - } - - /** * Get a long test property. *
    *
  1. Look up configuration value (which can pick up core-default.xml), @@ -511,4 +502,59 @@ public static void assertInstanceOf(Class expectedClass, Object obj) { */ private S3ATestUtils() { } + + /** + * Set a bucket specific property to a particular value. + * If the generic key passed in has an {@code fs.s3a. prefix}, + * that's stripped off, so that when the the bucket properties are propagated + * down to the generic values, that value gets copied down. + * @param conf configuration to set + * @param bucket bucket name + * @param genericKey key; can start with "fs.s3a." + * @param value value to set + */ + public static void setBucketOption(Configuration conf, String bucket, + String genericKey, String value) { + final String baseKey = genericKey.startsWith(FS_S3A_PREFIX) ? + genericKey.substring(FS_S3A_PREFIX.length()) + : genericKey; + conf.set(FS_S3A_BUCKET_PREFIX + bucket + '.' + baseKey, value); + } + + /** + * Assert that a configuration option matches the expected value. + * @param conf configuration + * @param key option key + * @param expected expected value + */ + public static void assertOptionEquals(Configuration conf, + String key, + String expected) { + assertEquals("Value of " + key, expected, conf.get(key)); + } + + /** + * Assume that a condition is met. If not: log at WARN and + * then throw an {@link AssumptionViolatedException}. + * @param message + * @param condition + */ + public static void assume(String message, boolean condition) { + if (!condition) { + LOG.warn(message); + } + Assume.assumeTrue(message, condition); + } + + /** + * Get the statistics from a wrapped block output stream. + * @param out output stream + * @return the (active) stats of the write + */ + public static S3AInstrumentation.OutputStreamStatistics + getOutputStreamStatistics(FSDataOutputStream out) { + S3ABlockOutputStream blockOutputStream + = (S3ABlockOutputStream) out.getWrappedStream(); + return blockOutputStream.getStatistics(); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestDataBlocks.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestDataBlocks.java index 9fa95fd..700ef5c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestDataBlocks.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestDataBlocks.java @@ -51,9 +51,8 @@ public void testByteBufferIO() throws Throwable { new S3ADataBlocks.ByteBufferBlockFactory(null)) { int limit = 128; S3ADataBlocks.ByteBufferBlockFactory.ByteBufferBlock block - = factory.create(limit); - assertEquals("outstanding buffers in " + factory, - 1, factory.getOutstandingBufferCount()); + = factory.create(1, limit, null); + assertOutstandingBuffers(factory, 1); byte[] buffer = ContractTestUtils.toAsciiByteArray("test data"); int bufferLen = buffer.length; @@ -66,24 +65,23 @@ public void testByteBufferIO() throws Throwable { block.hasCapacity(limit - bufferLen)); // now start the write - S3ADataBlocks.ByteBufferBlockFactory.ByteBufferInputStream - stream = block.startUpload(); + S3ADataBlocks.BlockUploadData blockUploadData = block.startUpload(); + S3ADataBlocks.ByteBufferBlockFactory.ByteBufferBlock.ByteBufferInputStream + stream = + (S3ADataBlocks.ByteBufferBlockFactory.ByteBufferBlock.ByteBufferInputStream) + blockUploadData.getUploadStream(); + assertTrue("Mark not supported in " + stream, stream.markSupported()); assertTrue("!hasRemaining() in " + stream, stream.hasRemaining()); int expected = bufferLen; assertEquals("wrong available() in " + stream, expected, stream.available()); assertEquals('t', stream.read()); + stream.mark(limit); expected--; assertEquals("wrong available() in " + stream, expected, stream.available()); - // close the block. The buffer must remain outstanding here; - // the stream manages the lifecycle of it now - block.close(); - assertEquals("outstanding buffers in " + factory, - 1, factory.getOutstandingBufferCount()); - block.close(); // read into a byte array with an offset int offset = 5; @@ -109,16 +107,31 @@ public void testByteBufferIO() throws Throwable { 0, stream.available()); assertTrue("hasRemaining() in " + stream, !stream.hasRemaining()); + // go the mark point + stream.reset(); + assertEquals('e', stream.read()); + // when the stream is closed, the data should be returned stream.close(); - assertEquals("outstanding buffers in " + factory, - 0, factory.getOutstandingBufferCount()); + assertOutstandingBuffers(factory, 1); + block.close(); + assertOutstandingBuffers(factory, 0); stream.close(); - assertEquals("outstanding buffers in " + factory, - 0, factory.getOutstandingBufferCount()); - + assertOutstandingBuffers(factory, 0); } } + /** + * Assert the number of buffers active for a block factory. + * @param factory factory + * @param expectedCount expected count. + */ + private static void assertOutstandingBuffers( + S3ADataBlocks.ByteBufferBlockFactory factory, + int expectedCount) { + assertEquals("outstanding buffers in " + factory, + expectedCount, factory.getOutstandingBufferCount()); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index fcb6444..89fae82 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -34,11 +34,13 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.StorageStatistics; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.S3AFileStatus; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.util.Progressable; @@ -159,13 +161,20 @@ public void test_010_CreateHugeFile() throws IOException { Statistic putBytesPending = Statistic.OBJECT_PUT_BYTES_PENDING; ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - + S3AInstrumentation.OutputStreamStatistics streamStatistics; long blocksPer10MB = blocksPerMB * 10; ProgressCallback progress = new ProgressCallback(timer); try (FSDataOutputStream out = fs.create(hugefile, true, uploadBlockSize, progress)) { + try { + streamStatistics = getOutputStreamStatistics(out); + } catch (ClassCastException e) { + LOG.info("Wrapped output stream is not block stream: {}", + out.getWrappedStream()); + streamStatistics = null; + } for (long block = 1; block <= blocks; block++) { out.write(data); @@ -190,7 +199,8 @@ public void test_010_CreateHugeFile() throws IOException { } } // now close the file - LOG.info("Closing file and completing write operation"); + LOG.info("Closing stream {}", out); + LOG.info("Statistics : {}", streamStatistics); ContractTestUtils.NanoTimer closeTimer = new ContractTestUtils.NanoTimer(); out.close(); @@ -201,6 +211,7 @@ public void test_010_CreateHugeFile() throws IOException { filesizeMB, uploadBlockSize); logFSState(); bandwidth(timer, filesize); + LOG.info("Statistics after stream closed: {}", streamStatistics); long putRequestCount = storageStatistics.getLong(putRequests); Long putByteCount = storageStatistics.getLong(putBytes); LOG.info("PUT {} bytes in {} operations; {} MB/operation", @@ -214,7 +225,14 @@ public void test_010_CreateHugeFile() throws IOException { S3AFileStatus status = fs.getFileStatus(hugefile); ContractTestUtils.assertIsFile(hugefile, status); assertEquals("File size in " + status, filesize, status.getLen()); - progress.verifyNoFailures("Put file " + hugefile + " of size " + filesize); + if (progress != null) { + progress.verifyNoFailures("Put file " + hugefile + + " of size " + filesize); + } + if (streamStatistics != null) { + assertEquals("actively allocated blocks in " + streamStatistics, + 0, streamStatistics.blocksActivelyAllocated()); + } } /** @@ -285,7 +303,9 @@ private void verifyNoFailures(String operation) { void assumeHugeFileExists() throws IOException { S3AFileSystem fs = getFileSystem(); ContractTestUtils.assertPathExists(fs, "huge file not created", hugefile); - ContractTestUtils.assertIsFile(fs, hugefile); + FileStatus status = fs.getFileStatus(hugefile); + ContractTestUtils.assertIsFile(hugefile, status); + assertTrue("File " + hugefile + " is empty", status.getLen() > 0); } private void logFSState() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java new file mode 100644 index 0000000..b4d3862 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import java.io.IOException; + +import java.net.URI; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.atomic.AtomicInteger; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3ATestUtils; + +import org.junit.After; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.fs.s3a.Constants.*; + +/** + * Tests concurrent operations on a single S3AFileSystem instance. + */ +public class ITestS3AConcurrentOps extends S3AScaleTestBase { + private static final Logger LOG = LoggerFactory.getLogger( + ITestS3AConcurrentOps.class); + private final int concurrentRenames = 10; + private Path testRoot; + private Path[] source = new Path[concurrentRenames]; + private Path[] target = new Path[concurrentRenames]; + private S3AFileSystem fs; + private S3AFileSystem auxFs; + + @Override + protected int getTestTimeoutSeconds() { + return 16 * 60; + } + + @Override + public void setup() throws Exception { + super.setup(); + fs = getRestrictedFileSystem(); + auxFs = getNormalFileSystem(); + + testRoot = path("/ITestS3AConcurrentOps"); + testRoot = S3ATestUtils.createTestPath(testRoot); + + for (int i = 0; i < concurrentRenames; i++){ + source[i] = new Path(testRoot, "source" + i); + target[i] = new Path(testRoot, "target" + i); + } + + LOG.info("Generating data..."); + auxFs.mkdirs(testRoot); + byte[] zeroes = ContractTestUtils.dataset(1024*1024, 0, Integer.MAX_VALUE); + for (Path aSource : source) { + try(FSDataOutputStream out = auxFs.create(aSource)) { + for (int mb = 0; mb < 20; mb++) { + LOG.debug("{}: Block {}...", aSource, mb); + out.write(zeroes); + } + } + } + LOG.info("Data generated..."); + } + + private S3AFileSystem getRestrictedFileSystem() throws Exception { + Configuration conf = getConfiguration(); + conf.setInt(MAX_THREADS, 2); + conf.setInt(MAX_TOTAL_TASKS, 1); + + conf.set(MIN_MULTIPART_THRESHOLD, "10M"); + conf.set(MULTIPART_SIZE, "5M"); + + S3AFileSystem s3a = getFileSystem(); + URI rootURI = new URI(conf.get(TEST_FS_S3A_NAME)); + s3a.initialize(rootURI, conf); + return s3a; + } + + private S3AFileSystem getNormalFileSystem() throws Exception { + S3AFileSystem s3a = new S3AFileSystem(); + Configuration conf = new Configuration(); + URI rootURI = new URI(conf.get(TEST_FS_S3A_NAME)); + s3a.initialize(rootURI, conf); + return s3a; + } + + @After + public void teardown() throws Exception { + if (auxFs != null) { + auxFs.delete(testRoot, true); + } + } + + /** + * Attempts to trigger a deadlock that would happen if any bounded resource + * pool became saturated with control tasks that depended on other tasks + * that now can't enter the resource pool to get completed. + */ + @Test + @SuppressWarnings("unchecked") + public void testParallelRename() throws InterruptedException, + ExecutionException, IOException { + ExecutorService executor = Executors.newFixedThreadPool( + concurrentRenames, new ThreadFactory() { + private AtomicInteger count = new AtomicInteger(0); + + public Thread newThread(Runnable r) { + return new Thread(r, + "testParallelRename" + count.getAndIncrement()); + } + }); + ((ThreadPoolExecutor)executor).prestartAllCoreThreads(); + Future[] futures = new Future[concurrentRenames]; + for (int i = 0; i < concurrentRenames; i++) { + final int index = i; + futures[i] = executor.submit(new Callable() { + @Override + public Boolean call() throws Exception { + NanoTimer timer = new NanoTimer(); + boolean result = fs.rename(source[index], target[index]); + timer.end("parallel rename %d", index); + LOG.info("Rename {} ran from {} to {}", index, + timer.getStartTime(), timer.getEndTime()); + return result; + } + }); + } + LOG.info("Waiting for tasks to complete..."); + LOG.info("Deadlock may have occurred if nothing else is logged" + + " or the test times out"); + for (int i = 0; i < concurrentRenames; i++) { + assertTrue("No future " + i, futures[i].get()); + assertPathExists("target path", target[i]); + assertPathDoesNotExist("source path", source[i]); + } + LOG.info("All tasks have completed successfully"); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java index e36d086..236ffcd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AInputStreamPerformance.java @@ -28,7 +28,6 @@ import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.fs.s3a.S3AInputStream; import org.apache.hadoop.fs.s3a.S3AInstrumentation; -import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.CompressionCodec; @@ -36,7 +35,6 @@ import org.apache.hadoop.util.LineReader; import org.junit.After; import org.junit.Assert; -import org.junit.Assume; import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; @@ -47,6 +45,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.*; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume; /** * Look at the performance of S3a operations. @@ -79,10 +78,11 @@ public void openFS() throws IOException { String testFile = conf.getTrimmed(KEY_CSVTEST_FILE, DEFAULT_CSVTEST_FILE); if (testFile.isEmpty()) { assumptionMessage = "Empty test property: " + KEY_CSVTEST_FILE; + LOG.warn(assumptionMessage); testDataAvailable = false; } else { - S3ATestUtils.useCSVDataEndpoint(conf); testData = new Path(testFile); + LOG.info("Using {} as input stream source", testData); Path path = this.testData; bindS3aFS(path); try { @@ -113,7 +113,7 @@ public void cleanup() { * Declare that the test requires the CSV test dataset. */ private void requireCSVTestData() { - Assume.assumeTrue(assumptionMessage, testDataAvailable); + assume(assumptionMessage, testDataAvailable); } /** @@ -146,7 +146,7 @@ FSDataInputStream openTestFile(S3AInputPolicy inputPolicy, long readahead) /** * Open a test file with the read buffer specified in the setting - * {@link #KEY_READ_BUFFER_SIZE}. + * {@link org.apache.hadoop.fs.s3a.S3ATestConstants#KEY_READ_BUFFER_SIZE}. * * @param path path to open * @param inputPolicy input policy to use diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index 9da621f..0f844b1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -28,7 +28,6 @@ import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; -import org.junit.Assume; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -91,14 +90,13 @@ public void setup() throws Exception { super.setup(); testPath = path("/tests3ascale"); LOG.debug("Scale test operation count = {}", getOperationCount()); - // multipart purges are disabled on the scale tests - // check for the test being enabled enabled = getTestPropertyBool( getConf(), KEY_SCALE_TESTS_ENABLED, DEFAULT_SCALE_TESTS_ENABLED); - Assume.assumeTrue("Scale test disabled: to enable set property " + - KEY_SCALE_TESTS_ENABLED, isEnabled()); + assume("Scale test disabled: to enable set property " + + KEY_SCALE_TESTS_ENABLED, + isEnabled()); } /** diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml new file mode 100644 index 0000000..a8c3b16 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -0,0 +1,160 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 2.8.0 + ../../hadoop-project + + org.apache.hadoop + hadoop-azure-datalake + Apache Hadoop Azure Data Lake support + + This module contains code to support integration with Azure Data Lake. + + jar + + 2.4.0 + 0.9.1 + UTF-8 + true + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + + + false + false + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + deplist + compile + + list + + + + ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt + + + + + + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + + maven-enforcer-plugin + + [1.0.0,) + + enforce + + + + + + + + + + + + + + + + + + + com.microsoft.azure + azure-data-lake-store-sdk + 2.1.4 + + + + org.apache.hadoop + hadoop-common + + + com.squareup.okhttp + okhttp + 2.4.0 + + + junit + junit + test + + + com.eclipsesource.minimal-json + minimal-json + 0.9.1 + test + + + org.apache.hadoop + hadoop-common + test + test-jar + + + com.squareup.okhttp + mockwebserver + 2.4.0 + test + + + diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java new file mode 100644 index 0000000..7ec04cf --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DelegateToFileSystem; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +/** + * Expose adl:// scheme to access ADL file system. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Adl extends DelegateToFileSystem { + + Adl(URI theUri, Configuration conf) throws IOException, URISyntaxException { + super(theUri, createDataLakeFileSystem(conf), conf, AdlFileSystem.SCHEME, + false); + } + + private static AdlFileSystem createDataLakeFileSystem(Configuration conf) { + AdlFileSystem fs = new AdlFileSystem(); + fs.setConf(conf); + return fs; + } + + /** + * @return Default port for ADL File system to communicate + */ + @Override + public final int getUriDefaultPort() { + return AdlFileSystem.DEFAULT_PORT; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java new file mode 100644 index 0000000..8fc8e00 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Constants. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class AdlConfKeys { + // OAuth2 Common Configuration + public static final String AZURE_AD_REFRESH_URL_KEY = "dfs.adls.oauth2" + + ".refresh.url"; + + // optional when provider type is refresh or client id. + public static final String AZURE_AD_TOKEN_PROVIDER_CLASS_KEY = + "dfs.adls.oauth2.access.token.provider"; + public static final String AZURE_AD_CLIENT_ID_KEY = + "dfs.adls.oauth2.client.id"; + public static final String AZURE_AD_TOKEN_PROVIDER_TYPE_KEY = + "dfs.adls.oauth2.access.token.provider.type"; + + // OAuth Refresh Token Configuration + public static final String AZURE_AD_REFRESH_TOKEN_KEY = + "dfs.adls.oauth2.refresh.token"; + + public static final String TOKEN_PROVIDER_TYPE_REFRESH_TOKEN = "RefreshToken"; + // OAuth Client Cred Token Configuration + public static final String AZURE_AD_CLIENT_SECRET_KEY = + "dfs.adls.oauth2.credential"; + public static final String TOKEN_PROVIDER_TYPE_CLIENT_CRED = + "ClientCredential"; + + public static final String READ_AHEAD_BUFFER_SIZE_KEY = + "adl.feature.client.cache.readahead"; + + public static final String WRITE_BUFFER_SIZE_KEY = + "adl.feature.client.cache.drop.behind.writes"; + static final String SECURE_TRANSPORT_SCHEME = "https"; + static final String INSECURE_TRANSPORT_SCHEME = "http"; + static final String ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER = + "adl.debug.override.localuserasfileowner"; + + static final boolean ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT = false; + static final long ADL_BLOCK_SIZE = 256 * 1024 * 1024; + static final int ADL_REPLICATION_FACTOR = 1; + static final String ADL_HADOOP_CLIENT_NAME = "hadoop-azure-datalake-"; + static final String ADL_HADOOP_CLIENT_VERSION = + "2.0.0-SNAPSHOT"; + static final String ADL_EVENTS_TRACKING_CLUSTERNAME = + "adl.events.tracking.clustername"; + + static final String ADL_EVENTS_TRACKING_CLUSTERTYPE = + "adl.events.tracking.clustertype"; + static final int DEFAULT_READ_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024; + static final int DEFAULT_WRITE_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024; + + static final String LATENCY_TRACKER_KEY = + "adl.dfs.enable.client.latency.tracker"; + static final boolean LATENCY_TRACKER_DEFAULT = true; + + static final String ADL_EXPERIMENT_POSITIONAL_READ_KEY = + "adl.feature.experiment.positional.read.enable"; + static final boolean ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT = true; + + static final String ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION = + "adl.feature.support.acl.bit"; + static final boolean ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT = true; + + static final String ADL_ENABLEUPN_FOR_OWNERGROUP_KEY = + "adl.feature.ownerandgroup.enableupn"; + static final boolean ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT = false; + + private AdlConfKeys() { + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java new file mode 100644 index 0000000..e0e273e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java @@ -0,0 +1,981 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; + +import com.google.common.annotations.VisibleForTesting; +import com.microsoft.azure.datalake.store.ADLStoreClient; +import com.microsoft.azure.datalake.store.ADLStoreOptions; +import com.microsoft.azure.datalake.store.DirectoryEntry; +import com.microsoft.azure.datalake.store.DirectoryEntryType; +import com.microsoft.azure.datalake.store.IfExists; +import com.microsoft.azure.datalake.store.LatencyTracker; +import com.microsoft.azure.datalake.store.UserGroupRepresentation; +import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.ContentSummary.Builder; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.InvalidPathException; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Options.Rename; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.VersionInfo; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.*; + +/** + * A FileSystem to access Azure Data Lake Store. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class AdlFileSystem extends FileSystem { + public static final String SCHEME = "adl"; + static final int DEFAULT_PORT = 443; + private URI uri; + private String userName; + private boolean overrideOwner; + private ADLStoreClient adlClient; + private Path workingDirectory; + private boolean aclBitStatus; + private UserGroupRepresentation oidOrUpn; + + + // retained for tests + private AccessTokenProvider tokenProvider; + private AzureADTokenProvider azureTokenProvider; + + @Override + public String getScheme() { + return SCHEME; + } + + public URI getUri() { + return uri; + } + + @Override + public int getDefaultPort() { + return DEFAULT_PORT; + } + + @Override + public boolean supportsSymlinks() { + return false; + } + + /** + * Called after a new FileSystem instance is constructed. + * + * @param storeUri a uri whose authority section names the host, port, etc. + * for this FileSystem + * @param conf the configuration + */ + @Override + public void initialize(URI storeUri, Configuration conf) throws IOException { + super.initialize(storeUri, conf); + this.setConf(conf); + this.uri = URI + .create(storeUri.getScheme() + "://" + storeUri.getAuthority()); + + try { + userName = UserGroupInformation.getCurrentUser().getShortUserName(); + } catch (IOException e) { + userName = "hadoop"; + } + + this.setWorkingDirectory(getHomeDirectory()); + + overrideOwner = getConf().getBoolean(ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER, + ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT); + + aclBitStatus = conf.getBoolean(ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION, + ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT); + + String accountFQDN = null; + String mountPoint = null; + String hostname = storeUri.getHost(); + if (!hostname.contains(".") && !hostname.equalsIgnoreCase( + "localhost")) { // this is a symbolic name. Resolve it. + String hostNameProperty = "dfs.adls." + hostname + ".hostname"; + String mountPointProperty = "dfs.adls." + hostname + ".mountpoint"; + accountFQDN = getNonEmptyVal(conf, hostNameProperty); + mountPoint = getNonEmptyVal(conf, mountPointProperty); + } else { + accountFQDN = hostname; + } + + if (storeUri.getPort() > 0) { + accountFQDN = accountFQDN + ":" + storeUri.getPort(); + } + + adlClient = ADLStoreClient + .createClient(accountFQDN, getAccessTokenProvider(conf)); + + ADLStoreOptions options = new ADLStoreOptions(); + options.enableThrowingRemoteExceptions(); + + if (getTransportScheme().equalsIgnoreCase(INSECURE_TRANSPORT_SCHEME)) { + options.setInsecureTransport(); + } + + if (mountPoint != null) { + options.setFilePathPrefix(mountPoint); + } + + String clusterName = conf.get(ADL_EVENTS_TRACKING_CLUSTERNAME, "UNKNOWN"); + String clusterType = conf.get(ADL_EVENTS_TRACKING_CLUSTERTYPE, "UNKNOWN"); + + String clientVersion = ADL_HADOOP_CLIENT_NAME + (StringUtils + .isEmpty(VersionInfo.getVersion().trim()) ? + ADL_HADOOP_CLIENT_VERSION.trim() : + VersionInfo.getVersion().trim()); + options.setUserAgentSuffix(clientVersion + "/" + + VersionInfo.getVersion().trim() + "/" + clusterName + "/" + + clusterType); + + adlClient.setOptions(options); + + boolean trackLatency = conf + .getBoolean(LATENCY_TRACKER_KEY, LATENCY_TRACKER_DEFAULT); + if (!trackLatency) { + LatencyTracker.disable(); + } + + boolean enableUPN = conf.getBoolean(ADL_ENABLEUPN_FOR_OWNERGROUP_KEY, + ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT); + oidOrUpn = enableUPN ? UserGroupRepresentation.UPN : + UserGroupRepresentation.OID; + } + + /** + * This method is provided for convenience for derived classes to define + * custom {@link AzureADTokenProvider} instance. + * + * In order to ensure secure hadoop infrastructure and user context for which + * respective {@link AdlFileSystem} instance is initialized, + * Loading {@link AzureADTokenProvider} is not sufficient. + * + * The order of loading {@link AzureADTokenProvider} is to first invoke + * {@link #getCustomAccessTokenProvider(Configuration)}, If method return null + * which means no implementation provided by derived classes, then + * configuration object is loaded to retrieve token configuration as specified + * is documentation. + * + * Custom token management takes the higher precedence during initialization. + * + * @param conf Configuration object + * @return null if the no custom {@link AzureADTokenProvider} token management + * is specified. + * @throws IOException if failed to initialize token provider. + */ + protected synchronized AzureADTokenProvider getCustomAccessTokenProvider( + Configuration conf) throws IOException { + String className = getNonEmptyVal(conf, AZURE_AD_TOKEN_PROVIDER_CLASS_KEY); + + Class azureADTokenProviderClass = + conf.getClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, null, + AzureADTokenProvider.class); + if (azureADTokenProviderClass == null) { + throw new IllegalArgumentException( + "Configuration " + className + " " + "not defined/accessible."); + } + + azureTokenProvider = ReflectionUtils + .newInstance(azureADTokenProviderClass, conf); + if (azureTokenProvider == null) { + throw new IllegalArgumentException("Failed to initialize " + className); + } + + azureTokenProvider.initialize(conf); + return azureTokenProvider; + } + + private AccessTokenProvider getAccessTokenProvider(Configuration config) + throws IOException { + Configuration conf = ProviderUtils.excludeIncompatibleCredentialProviders( + config, AdlFileSystem.class); + TokenProviderType type = conf.getEnum( + AdlConfKeys.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, TokenProviderType.Custom); + + switch (type) { + case RefreshToken: + tokenProvider = getConfRefreshTokenBasedTokenProvider(conf); + break; + case ClientCredential: + tokenProvider = getConfCredentialBasedTokenProvider(conf); + break; + case Custom: + default: + AzureADTokenProvider azureADTokenProvider = getCustomAccessTokenProvider( + conf); + tokenProvider = new SdkTokenProviderAdapter(azureADTokenProvider); + break; + } + + return tokenProvider; + } + + private AccessTokenProvider getConfCredentialBasedTokenProvider( + Configuration conf) throws IOException { + String clientId = getPasswordString(conf, AZURE_AD_CLIENT_ID_KEY); + String refreshUrl = getPasswordString(conf, AZURE_AD_REFRESH_URL_KEY); + String clientSecret = getPasswordString(conf, AZURE_AD_CLIENT_SECRET_KEY); + return new ClientCredsTokenProvider(refreshUrl, clientId, clientSecret); + } + + private AccessTokenProvider getConfRefreshTokenBasedTokenProvider( + Configuration conf) throws IOException { + String clientId = getPasswordString(conf, AZURE_AD_CLIENT_ID_KEY); + String refreshToken = getPasswordString(conf, AZURE_AD_REFRESH_TOKEN_KEY); + return new RefreshTokenBasedTokenProvider(clientId, refreshToken); + } + + @VisibleForTesting + AccessTokenProvider getTokenProvider() { + return tokenProvider; + } + + @VisibleForTesting + AzureADTokenProvider getAzureTokenProvider() { + return azureTokenProvider; + } + + /** + * Constructing home directory locally is fine as long as Hadoop + * local user name and ADL user name relationship story is not fully baked + * yet. + * + * @return Hadoop local user home directory. + */ + @Override + public Path getHomeDirectory() { + return makeQualified(new Path("/user/" + userName)); + } + + /** + * Create call semantic is handled differently in case of ADL. Create + * semantics is translated to Create/Append + * semantics. + * 1. No dedicated connection to server. + * 2. Buffering is locally done, Once buffer is full or flush is invoked on + * the by the caller. All the pending + * data is pushed to ADL as APPEND operation code. + * 3. On close - Additional call is send to server to close the stream, and + * release lock from the stream. + * + * Necessity of Create/Append semantics is + * 1. ADL backend server does not allow idle connection for longer duration + * . In case of slow writer scenario, + * observed connection timeout/Connection reset causing occasional job + * failures. + * 2. Performance boost to jobs which are slow writer, avoided network latency + * 3. ADL equally better performing with multiple of 4MB chunk as append + * calls. + * + * @param f File path + * @param permission Access permission for the newly created file + * @param overwrite Remove existing file and recreate new one if true + * otherwise throw error if file exist + * @param bufferSize Buffer size, ADL backend does not honour + * @param replication Replication count, ADL backend does not honour + * @param blockSize Block size, ADL backend does not honour + * @param progress Progress indicator + * @return FSDataOutputStream OutputStream on which application can push + * stream of bytes + * @throws IOException when system error, internal server error or user error + */ + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + IfExists overwriteRule = overwrite ? IfExists.OVERWRITE : IfExists.FAIL; + return new FSDataOutputStream(new AdlFsOutputStream(adlClient + .createFile(toRelativeFilePath(f), overwriteRule, + Integer.toOctalString(applyUMask(permission).toShort()), true), + getConf()), this.statistics); + } + + /** + * Opens an FSDataOutputStream at the indicated Path with write-progress + * reporting. Same as create(), except fails if parent directory doesn't + * already exist. + * + * @param f the file name to open + * @param permission Access permission for the newly created file + * @param flags {@link CreateFlag}s to use for this stream. + * @param bufferSize the size of the buffer to be used. ADL backend does + * not honour + * @param replication required block replication for the file. ADL backend + * does not honour + * @param blockSize Block size, ADL backend does not honour + * @param progress Progress indicator + * @throws IOException when system error, internal server error or user error + * @see #setPermission(Path, FsPermission) + * @deprecated API only for 0.20-append + */ + @Override + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, + long blockSize, Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + IfExists overwriteRule = IfExists.FAIL; + for (CreateFlag flag : flags) { + if (flag == CreateFlag.OVERWRITE) { + overwriteRule = IfExists.OVERWRITE; + break; + } + } + + return new FSDataOutputStream(new AdlFsOutputStream(adlClient + .createFile(toRelativeFilePath(f), overwriteRule, + Integer.toOctalString(applyUMask(permission).toShort()), false), + getConf()), this.statistics); + } + + /** + * Append to an existing file (optional operation). + * + * @param f the existing file to be appended. + * @param bufferSize the size of the buffer to be used. ADL backend does + * not honour + * @param progress Progress indicator + * @throws IOException when system error, internal server error or user error + */ + @Override + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + return new FSDataOutputStream( + new AdlFsOutputStream(adlClient.getAppendStream(toRelativeFilePath(f)), + getConf()), this.statistics); + } + + /** + * Azure data lake does not support user configuration for data replication + * hence not leaving system to query on + * azure data lake. + * + * Stub implementation + * + * @param p Not honoured + * @param replication Not honoured + * @return True hard coded since ADL file system does not support + * replication configuration + * @throws IOException No exception would not thrown in this case however + * aligning with parent api definition. + */ + @Override + public boolean setReplication(final Path p, final short replication) + throws IOException { + statistics.incrementWriteOps(1); + return true; + } + + /** + * Open call semantic is handled differently in case of ADL. Instead of + * network stream is returned to the user, + * Overridden FsInputStream is returned. + * + * @param f File path + * @param buffersize Buffer size, Not honoured + * @return FSDataInputStream InputStream on which application can read + * stream of bytes + * @throws IOException when system error, internal server error or user error + */ + @Override + public FSDataInputStream open(final Path f, final int buffersize) + throws IOException { + statistics.incrementReadOps(1); + return new FSDataInputStream( + new AdlFsInputStream(adlClient.getReadStream(toRelativeFilePath(f)), + statistics, getConf())); + } + + /** + * Return a file status object that represents the path. + * + * @param f The path we want information from + * @return a FileStatus object + * @throws IOException when the path does not exist or any other error; + * IOException see specific implementation + */ + @Override + public FileStatus getFileStatus(final Path f) throws IOException { + statistics.incrementReadOps(1); + DirectoryEntry entry = + adlClient.getDirectoryEntry(toRelativeFilePath(f), oidOrUpn); + return toFileStatus(entry, f); + } + + /** + * List the statuses of the files/directories in the given path if the path is + * a directory. + * + * @param f given path + * @return the statuses of the files/directories in the given patch + * @throws IOException when the path does not exist or any other error; + * IOException see specific implementation + */ + @Override + public FileStatus[] listStatus(final Path f) throws IOException { + statistics.incrementReadOps(1); + List entries = + adlClient.enumerateDirectory(toRelativeFilePath(f), oidOrUpn); + return toFileStatuses(entries, f); + } + + /** + * Renames Path src to Path dst. Can take place on local fs + * or remote DFS. + * + * ADLS support POSIX standard for rename operation. + * + * @param src path to be renamed + * @param dst new path after rename + * @return true if rename is successful + * @throws IOException on failure + */ + @Override + public boolean rename(final Path src, final Path dst) throws IOException { + statistics.incrementWriteOps(1); + if (toRelativeFilePath(src).equals("/")) { + return false; + } + + return adlClient.rename(toRelativeFilePath(src), toRelativeFilePath(dst)); + } + + @Override + @Deprecated + public void rename(final Path src, final Path dst, + final Options.Rename... options) throws IOException { + statistics.incrementWriteOps(1); + boolean overwrite = false; + for (Rename renameOption : options) { + if (renameOption == Rename.OVERWRITE) { + overwrite = true; + break; + } + } + adlClient + .rename(toRelativeFilePath(src), toRelativeFilePath(dst), overwrite); + } + + /** + * Concat existing files together. + * + * @param trg the path to the target destination. + * @param srcs the paths to the sources to use for the concatenation. + * @throws IOException when system error, internal server error or user error + */ + @Override + public void concat(final Path trg, final Path[] srcs) throws IOException { + statistics.incrementWriteOps(1); + List sourcesList = new ArrayList(); + for (Path entry : srcs) { + sourcesList.add(toRelativeFilePath(entry)); + } + adlClient.concatenateFiles(toRelativeFilePath(trg), sourcesList); + } + + /** + * Delete a file. + * + * @param path the path to delete. + * @param recursive if path is a directory and set to + * true, the directory is deleted else throws an exception. + * In case of a file the recursive can be set to either + * true or false. + * @return true if delete is successful else false. + * @throws IOException when system error, internal server error or user error + */ + @Override + public boolean delete(final Path path, final boolean recursive) + throws IOException { + statistics.incrementWriteOps(1); + String relativePath = toRelativeFilePath(path); + // Delete on root directory not supported. + if (relativePath.equals("/")) { + // This is important check after recent commit + // HADOOP-12977 and HADOOP-13716 validates on root for + // 1. if root is empty and non recursive delete then return false. + // 2. if root is non empty and non recursive delete then throw exception. + if (!recursive + && adlClient.enumerateDirectory(toRelativeFilePath(path), 1).size() + > 0) { + throw new IOException("Delete on root is not supported."); + } + return false; + } + + return recursive ? + adlClient.deleteRecursive(relativePath) : + adlClient.delete(relativePath); + } + + /** + * Make the given file and all non-existent parents into + * directories. Has the semantics of Unix 'mkdir -p'. + * Existence of the directory hierarchy is not an error. + * + * @param path path to create + * @param permission to apply to path + */ + @Override + public boolean mkdirs(final Path path, final FsPermission permission) + throws IOException { + statistics.incrementWriteOps(1); + return adlClient.createDirectory(toRelativeFilePath(path), + Integer.toOctalString(applyUMask(permission).toShort())); + } + + private FileStatus[] toFileStatuses(final List entries, + final Path parent) { + FileStatus[] fileStatuses = new FileStatus[entries.size()]; + int index = 0; + for (DirectoryEntry entry : entries) { + FileStatus status = toFileStatus(entry, parent); + if (!(entry.name == null || entry.name == "")) { + status.setPath( + new Path(parent.makeQualified(uri, workingDirectory), entry.name)); + } + + fileStatuses[index++] = status; + } + + return fileStatuses; + } + + private FsPermission applyUMask(FsPermission permission) { + if (permission == null) { + permission = FsPermission.getDefault(); + } + return permission.applyUMask(FsPermission.getUMask(getConf())); + } + + private FileStatus toFileStatus(final DirectoryEntry entry, final Path f) { + boolean isDirectory = entry.type == DirectoryEntryType.DIRECTORY; + long lastModificationData = entry.lastModifiedTime.getTime(); + long lastAccessTime = entry.lastAccessTime.getTime(); + // set aclBit from ADLS backend response if + // ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION is true. + final boolean aclBit = aclBitStatus ? entry.aclBit : false; + + FsPermission permission = new AdlPermission(aclBit, + Short.valueOf(entry.permission, 8)); + String user = entry.user; + String group = entry.group; + + FileStatus status; + if (overrideOwner) { + status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR, + ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission, + userName, "hdfs", this.makeQualified(f)); + } else { + status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR, + ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission, + user, group, this.makeQualified(f)); + } + + return status; + } + + /** + * Set owner of a path (i.e. a file or a directory). + * The parameters owner and group cannot both be null. + * + * @param path The path + * @param owner If it is null, the original username remains unchanged. + * @param group If it is null, the original groupname remains unchanged. + */ + @Override + public void setOwner(final Path path, final String owner, final String group) + throws IOException { + statistics.incrementWriteOps(1); + adlClient.setOwner(toRelativeFilePath(path), owner, group); + } + + /** + * Set permission of a path. + * + * @param path The path + * @param permission Access permission + */ + @Override + public void setPermission(final Path path, final FsPermission permission) + throws IOException { + statistics.incrementWriteOps(1); + adlClient.setPermission(toRelativeFilePath(path), + Integer.toOctalString(permission.toShort())); + } + + /** + * Modifies ACL entries of files and directories. This method can add new ACL + * entries or modify the permissions on existing ACL entries. All existing + * ACL entries that are not specified in this call are retained without + * changes. (Modifications are merged into the current ACL.) + * + * @param path Path to modify + * @param aclSpec List of AclEntry describing modifications + * @throws IOException if an ACL could not be modified + */ + @Override + public void modifyAclEntries(final Path path, final List aclSpec) + throws IOException { + statistics.incrementWriteOps(1); + List msAclEntries = new + ArrayList(); + for (AclEntry aclEntry : aclSpec) { + msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry + .parseAclEntry(aclEntry.toString())); + } + adlClient.modifyAclEntries(toRelativeFilePath(path), msAclEntries); + } + + /** + * Removes ACL entries from files and directories. Other ACL entries are + * retained. + * + * @param path Path to modify + * @param aclSpec List of AclEntry describing entries to remove + * @throws IOException if an ACL could not be modified + */ + @Override + public void removeAclEntries(final Path path, final List aclSpec) + throws IOException { + statistics.incrementWriteOps(1); + List msAclEntries = new + ArrayList(); + for (AclEntry aclEntry : aclSpec) { + msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry + .parseAclEntry(aclEntry.toString(), true)); + } + adlClient.removeAclEntries(toRelativeFilePath(path), msAclEntries); + } + + /** + * Removes all default ACL entries from files and directories. + * + * @param path Path to modify + * @throws IOException if an ACL could not be modified + */ + @Override + public void removeDefaultAcl(final Path path) throws IOException { + statistics.incrementWriteOps(1); + adlClient.removeDefaultAcls(toRelativeFilePath(path)); + } + + /** + * Removes all but the base ACL entries of files and directories. The entries + * for user, group, and others are retained for compatibility with permission + * bits. + * + * @param path Path to modify + * @throws IOException if an ACL could not be removed + */ + @Override + public void removeAcl(final Path path) throws IOException { + statistics.incrementWriteOps(1); + adlClient.removeAllAcls(toRelativeFilePath(path)); + } + + /** + * Fully replaces ACL of files and directories, discarding all existing + * entries. + * + * @param path Path to modify + * @param aclSpec List of AclEntry describing modifications, must include + * entries for user, group, and others for compatibility with + * permission bits. + * @throws IOException if an ACL could not be modified + */ + @Override + public void setAcl(final Path path, final List aclSpec) + throws IOException { + statistics.incrementWriteOps(1); + List msAclEntries = new + ArrayList(); + for (AclEntry aclEntry : aclSpec) { + msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry + .parseAclEntry(aclEntry.toString())); + } + + adlClient.setAcl(toRelativeFilePath(path), msAclEntries); + } + + /** + * Gets the ACL of a file or directory. + * + * @param path Path to get + * @return AclStatus describing the ACL of the file or directory + * @throws IOException if an ACL could not be read + */ + @Override + public AclStatus getAclStatus(final Path path) throws IOException { + statistics.incrementReadOps(1); + com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = + adlClient.getAclStatus(toRelativeFilePath(path), oidOrUpn); + AclStatus.Builder aclStatusBuilder = new AclStatus.Builder(); + aclStatusBuilder.owner(adlStatus.owner); + aclStatusBuilder.group(adlStatus.group); + aclStatusBuilder.setPermission( + new FsPermission(Short.valueOf(adlStatus.octalPermissions, 8))); + aclStatusBuilder.stickyBit(adlStatus.stickyBit); + String aclListString = com.microsoft.azure.datalake.store.acl.AclEntry + .aclListToString(adlStatus.aclSpec); + List aclEntries = AclEntry.parseAclSpec(aclListString, true); + aclStatusBuilder.addEntries(aclEntries); + return aclStatusBuilder.build(); + } + + /** + * Checks if the user can access a path. The mode specifies which access + * checks to perform. If the requested permissions are granted, then the + * method returns normally. If access is denied, then the method throws an + * {@link AccessControlException}. + * + * @param path Path to check + * @param mode type of access to check + * @throws AccessControlException if access is denied + * @throws java.io.FileNotFoundException if the path does not exist + * @throws IOException see specific implementation + */ + @Override + public void access(final Path path, FsAction mode) throws IOException { + statistics.incrementReadOps(1); + if (!adlClient.checkAccess(toRelativeFilePath(path), mode.SYMBOL)) { + throw new AccessControlException("Access Denied : " + path.toString()); + } + } + + /** + * Return the {@link ContentSummary} of a given {@link Path}. + * + * @param f path to use + */ + @Override + public ContentSummary getContentSummary(Path f) throws IOException { + statistics.incrementReadOps(1); + com.microsoft.azure.datalake.store.ContentSummary msSummary = adlClient + .getContentSummary(toRelativeFilePath(f)); + return new Builder().length(msSummary.length) + .directoryCount(msSummary.directoryCount).fileCount(msSummary.fileCount) + .spaceConsumed(msSummary.spaceConsumed).build(); + } + + @VisibleForTesting + protected String getTransportScheme() { + return SECURE_TRANSPORT_SCHEME; + } + + @VisibleForTesting + String toRelativeFilePath(Path path) { + return path.makeQualified(uri, workingDirectory).toUri().getPath(); + } + + /** + * Get the current working directory for the given file system. + * + * @return the directory pathname + */ + @Override + public Path getWorkingDirectory() { + return workingDirectory; + } + + /** + * Set the current working directory for the given file system. All relative + * paths will be resolved relative to it. + * + * @param dir Working directory path. + */ + @Override + public void setWorkingDirectory(final Path dir) { + if (dir == null) { + throw new InvalidPathException("Working directory cannot be set to NULL"); + } + + /** + * Do not validate the scheme and URI of the passsed parameter. When Adls + * runs as additional file system, working directory set has the default + * file system scheme and uri. + * + * Found a problem during PIG execution in + * https://github.com/apache/pig/blob/branch-0 + * .15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer + * /PigInputFormat.java#L235 + * However similar problem would be present in other application so + * defaulting to build working directory using relative path only. + */ + this.workingDirectory = this.makeAbsolute(dir); + } + + /** + * Return the number of bytes that large input files should be optimally + * be split into to minimize i/o time. + * + * @deprecated use {@link #getDefaultBlockSize(Path)} instead + */ + @Deprecated + public long getDefaultBlockSize() { + return ADL_BLOCK_SIZE; + } + + /** + * Return the number of bytes that large input files should be optimally + * be split into to minimize i/o time. The given path will be used to + * locate the actual filesystem. The full path does not have to exist. + * + * @param f path of file + * @return the default block size for the path's filesystem + */ + public long getDefaultBlockSize(Path f) { + return getDefaultBlockSize(); + } + + /** + * Get the block size. + * @param f the filename + * @return the number of bytes in a block + */ + /** + * @deprecated Use getFileStatus() instead + */ + @Deprecated + public long getBlockSize(Path f) throws IOException { + return ADL_BLOCK_SIZE; + } + + @Override + public BlockLocation[] getFileBlockLocations(final FileStatus status, + final long offset, final long length) throws IOException { + if (status == null) { + return null; + } + + if ((offset < 0) || (length < 0)) { + throw new IllegalArgumentException("Invalid start or len parameter"); + } + + if (status.getLen() < offset) { + return new BlockLocation[0]; + } + + final String[] name = {"localhost"}; + final String[] host = {"localhost"}; + long blockSize = ADL_BLOCK_SIZE; + int numberOfLocations = + (int) (length / blockSize) + ((length % blockSize == 0) ? 0 : 1); + BlockLocation[] locations = new BlockLocation[numberOfLocations]; + for (int i = 0; i < locations.length; i++) { + long currentOffset = offset + (i * blockSize); + long currentLength = Math.min(blockSize, offset + length - currentOffset); + locations[i] = new BlockLocation(name, host, currentOffset, + currentLength); + } + + return locations; + } + + @Override + public BlockLocation[] getFileBlockLocations(final Path p, final long offset, + final long length) throws IOException { + // read ops incremented in getFileStatus + FileStatus fileStatus = getFileStatus(p); + return getFileBlockLocations(fileStatus, offset, length); + } + + /** + * Get replication. + * + * @param src file name + * @return file replication + * @deprecated Use getFileStatus() instead + */ + @Deprecated + public short getReplication(Path src) { + return ADL_REPLICATION_FACTOR; + } + + private Path makeAbsolute(Path path) { + return path.isAbsolute() ? path : new Path(this.workingDirectory, path); + } + + private static String getNonEmptyVal(Configuration conf, String key) { + String value = conf.get(key); + if (StringUtils.isEmpty(value)) { + throw new IllegalArgumentException( + "No value for " + key + " found in conf file."); + } + return value; + } + + /** + * A wrapper of {@link Configuration#getPassword(String)}. It returns + * String instead of char[]. + * + * @param conf the configuration + * @param key the property key + * @return the password string + * @throws IOException if the password was not found + */ + private static String getPasswordString(Configuration conf, String key) + throws IOException { + char[] passchars = conf.getPassword(key); + if (passchars == null) { + throw new IOException("Password " + key + " not found"); + } + return new String(passchars); + } + + @VisibleForTesting + public void setUserGroupRepresentationAsUPN(boolean enableUPN) { + oidOrUpn = enableUPN ? UserGroupRepresentation.UPN : + UserGroupRepresentation.OID; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java new file mode 100644 index 0000000..5248cbf --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.microsoft.azure.datalake.store.ADLFileInputStream; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileSystem.Statistics; + +import java.io.IOException; + +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_READ_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY; + +/** + * Wraps {@link ADLFileInputStream} implementation. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class AdlFsInputStream extends FSInputStream { + + private final ADLFileInputStream in; + private final Statistics stat; + private final boolean enablePositionalReadExperiment; + + public AdlFsInputStream(ADLFileInputStream inputStream, Statistics statistics, + Configuration conf) throws IOException { + this.in = inputStream; + this.in.setBufferSize(conf.getInt(READ_AHEAD_BUFFER_SIZE_KEY, + DEFAULT_READ_AHEAD_BUFFER_SIZE)); + enablePositionalReadExperiment = conf + .getBoolean(ADL_EXPERIMENT_POSITIONAL_READ_KEY, + ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT); + stat = statistics; + } + + @Override + public synchronized void seek(long pos) throws IOException { + in.seek(pos); + } + + /** + * Return the current offset from the start of the file. + */ + @Override + public synchronized long getPos() throws IOException { + return in.getPos(); + } + + @Override + public boolean seekToNewSource(long l) throws IOException { + return false; + } + + @Override + public synchronized int read() throws IOException { + int ch = in.read(); + if (stat != null && ch != -1) { + stat.incrementBytesRead(1); + } + return ch; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + int numberOfByteRead = 0; + if (enablePositionalReadExperiment) { + numberOfByteRead = in.read(position, buffer, offset, length); + } else { + numberOfByteRead = super.read(position, buffer, offset, length); + } + + if (stat != null && numberOfByteRead > 0) { + stat.incrementBytesRead(numberOfByteRead); + } + return numberOfByteRead; + } + + @Override + public synchronized int read(byte[] buffer, int offset, int length) + throws IOException { + int numberOfByteRead = in.read(buffer, offset, length); + if (stat != null && numberOfByteRead > 0) { + stat.incrementBytesRead(numberOfByteRead); + } + return numberOfByteRead; + } + + /** + * This method returns the remaining bytes in the stream, rather than the + * expected Java + * interpretation of {@link java.io.InputStream#available()}, which expects + * the + * number of remaining + * bytes in the local buffer. Moreover, it caps the value returned to a + * maximum of Integer.MAX_VALUE. + * These changed behaviors are to ensure compatibility with the + * expectations of HBase WAL reader, + * which depends on available() returning the number of bytes in stream. + * + * Given all other FileSystems in the hadoop ecosystem (especially HDFS) do + * this, it is possible other + * apps other than HBase would also pick up expectation of this behavior + * based on HDFS implementation. + * Therefore keeping this quirky behavior here, to ensure compatibility. + * + * @return remaining bytes in the stream, with maximum of Integer.MAX_VALUE. + * @throws IOException If fails to get the position or file length from SDK. + */ + @Override + public synchronized int available() throws IOException { + return (int) Math.min(in.length() - in.getPos(), Integer.MAX_VALUE); + } + + @Override + public synchronized void close() throws IOException { + in.close(); + } + + @Override + public synchronized long skip(long pos) throws IOException { + return in.skip(pos); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java new file mode 100644 index 0000000..2b89fb0 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.microsoft.azure.datalake.store.ADLFileOutputStream; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Syncable; + +import java.io.IOException; +import java.io.OutputStream; + +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_WRITE_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY; + +/** + * Wraps {@link com.microsoft.azure.datalake.store.ADLFileOutputStream} + * implementation. + * + * Flush semantics. + * no-op, since some parts of hadoop ecosystem call flush(), expecting it to + * have no perf impact. In hadoop filesystems, flush() itself guarantees no + * durability: that is achieved by calling hflush() or hsync() + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class AdlFsOutputStream extends OutputStream implements Syncable { + private final ADLFileOutputStream out; + + public AdlFsOutputStream(ADLFileOutputStream out, Configuration configuration) + throws IOException { + this.out = out; + out.setBufferSize(configuration + .getInt(WRITE_BUFFER_SIZE_KEY, DEFAULT_WRITE_AHEAD_BUFFER_SIZE)); + } + + @Override + public synchronized void write(int b) throws IOException { + out.write(b); + } + + @Override + public synchronized void write(byte[] b, int off, int len) + throws IOException { + out.write(b, off, len); + } + + @Override + public synchronized void close() throws IOException { + out.close(); + } + + public synchronized void sync() throws IOException { + out.flush(); + } + + public synchronized void hflush() throws IOException { + out.flush(); + } + + public synchronized void hsync() throws IOException { + out.flush(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java new file mode 100644 index 0000000..af3342a --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.fs.permission.FsPermission; + +/** + * Hadoop shell command -getfacl does not invoke getAclStatus if FsPermission + * from getFileStatus has not set ACL bit to true. By default getAclBit returns + * false. + * + * Provision to make additional call to invoke getAclStatus would be redundant + * when adls is running as additional FS. To avoid this redundancy, provided + * configuration to return true/false on getAclBit. + */ +class AdlPermission extends FsPermission { + private final boolean aclBit; + + AdlPermission(boolean aclBitStatus, Short aShort) { + super(aShort); + this.aclBit = aclBitStatus; + } + + /** + * Returns true if "adl.feature.support.acl.bit" configuration is set to + * true. + * + * If configuration is not set then default value is true. + * + * @return If configuration is not set then default value is true. + */ + public boolean getAclBit() { + return aclBit; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof FsPermission) { + FsPermission that = (FsPermission) obj; + return this.getUserAction() == that.getUserAction() + && this.getGroupAction() == that.getGroupAction() + && this.getOtherAction() == that.getOtherAction() + && this.getStickyBit() == that.getStickyBit(); + } + return false; + } + + @Override + public int hashCode() { + return toShort(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java new file mode 100644 index 0000000..7b107ae --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.AzureADToken; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; + +import java.io.IOException; + +final class SdkTokenProviderAdapter extends AccessTokenProvider { + + private AzureADTokenProvider tokenProvider; + + SdkTokenProviderAdapter(AzureADTokenProvider tp) { + this.tokenProvider = tp; + } + + protected AzureADToken refreshToken() throws IOException { + AzureADToken azureADToken = new AzureADToken(); + azureADToken.accessToken = tokenProvider.getAccessToken(); + azureADToken.expiry = tokenProvider.getExpiryTime(); + return azureADToken; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java new file mode 100644 index 0000000..9fd4f4f --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +enum TokenProviderType { + RefreshToken, + ClientCredential, + Custom +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java new file mode 100644 index 0000000..a0b3922 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl.oauth2; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.util.Date; + +/** + * Provide an Azure Active Directory supported + * OAuth2 access token to be used to authenticate REST calls against Azure data + * lake file system {@link org.apache.hadoop.fs.adl.AdlFileSystem}. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class AzureADTokenProvider { + + /** + * Initialize with supported configuration. This method is invoked when the + * {@link org.apache.hadoop.fs.adl.AdlFileSystem#initialize + * (URI, Configuration)} method is invoked. + * + * @param configuration Configuration object + * @throws IOException if instance can not be configured. + */ + public abstract void initialize(Configuration configuration) + throws IOException; + + /** + * Obtain the access token that should be added to https connection's header. + * Will be called depending upon {@link #getExpiryTime()} expiry time is set, + * so implementations should be performant. Implementations are responsible + * for any refreshing of the token. + * + * @return String containing the access token + * @throws IOException if there is an error fetching the token + */ + public abstract String getAccessToken() throws IOException; + + /** + * Obtain expiry time of the token. If implementation is performant enough to + * maintain expiry and expect {@link #getAccessToken()} call for every + * connection then safe to return current or past time. + * + * However recommended to use the token expiry time received from Azure Active + * Directory. + * + * @return Date to expire access token retrieved from AAD. + */ + public abstract Date getExpiryTime(); +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java new file mode 100644 index 0000000..1613941 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/** + * public interface to expose OAuth2 authentication related features. + */ +package org.apache.hadoop.fs.adl.oauth2; \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java new file mode 100644 index 0000000..456eebc --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/** + * Supporting classes for metrics instrumentation. + */ +package org.apache.hadoop.fs.adl; \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md new file mode 100644 index 0000000..6d9e173 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -0,0 +1,265 @@ + + +# Hadoop Azure Data Lake Support + +* [Introduction](#Introduction) +* [Features](#Features) +* [Limitations](#Limitations) +* [Usage](#Usage) + * [Concepts](#Concepts) + * [OAuth2 Support](#OAuth2_Support) + * [Configuring Credentials & FileSystem](#Configuring_Credentials) + * [Using Refresh Token](#Refresh_Token) + * [Using Client Keys](#Client_Credential_Token) + * [Protecting the Credentials with Credential Providers](#Credential_Provider) + * [Enabling ADL Filesystem](#Enabling_ADL) + * [Accessing adl URLs](#Accessing_adl_URLs) + * [User/Group Representation](#OIDtoUPNConfiguration) +* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module) + +## Introduction + +The hadoop-azure-datalake module provides support for integration with +[Azure Data Lake Store]( https://azure.microsoft.com/en-in/documentation/services/data-lake-store/). +The jar file is named azure-datalake-store.jar. + +## Features + +* Read and write data stored in an Azure Data Lake Storage account. +* Reference file system paths using URLs using the `adl` scheme for Secure Webhdfs i.e. SSL + encrypted access. +* Can act as a source of data in a MapReduce job, or a sink. +* Tested on both Linux and Windows. +* Tested for scale. +* API setOwner/setAcl/removeAclEntries/modifyAclEntries accepts UPN or OID + (Object ID) as user and group name. + +## Limitations +Partial or no support for the following operations : + +* Operation on Symbolic Link +* Proxy Users +* File Truncate +* File Checksum +* File replication factor +* Home directory the active user on Hadoop cluster. +* Extended Attributes(XAttrs) Operations +* Snapshot Operations +* Delegation Token Operations +* User and group information returned as ListStatus and GetFileStatus is in form of GUID associated in Azure Active Directory. + +## Usage + +### Concepts +Azure Data Lake Storage access path syntax is + + adl://.azuredatalakestore.net/ + +Get started with azure data lake account with [https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/](https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/) + +#### OAuth2 Support +Usage of Azure Data Lake Storage requires OAuth2 bearer token to be present as part of the HTTPS header as per OAuth2 specification. Valid OAuth2 bearer token should be obtained from Azure Active Directory for valid users who have access to Azure Data Lake Storage Account. + +Azure Active Directory (Azure AD) is Microsoft's multi-tenant cloud based directory and identity management service. See [https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/](https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/) + +Following sections describes on OAuth2 configuration in core-site.xml. + +## Configuring Credentials & FileSystem +Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal). + +### Using Refresh Token + +Add the following properties to your core-site.xml + + + dfs.adls.oauth2.access.token.provider.type + RefreshToken + + +Application require to set Client id and OAuth2 refresh token from Azure Active Directory associated with client id. See [https://github.com/AzureAD/azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java). + +**Do not share client id and refresh token, it must be kept secret.** + + + dfs.adls.oauth2.client.id + + + + + dfs.adls.oauth2.refresh.token + + + + +### Using Client Keys + +#### Generating the Service Principal +1. Go to the portal (https://portal.azure.com) +2. Under "Browse", look for Active Directory and click on it. +3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user. +4. Go through the wizard +5. Once app is created, Go to app configuration, and find the section on "keys" +6. Select a key duration and hit save. Save the generated keys. +7. Note down the properties you will need to auth: + - The client ID + - The key you just generated above + - The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value) + - Resource: Always https://management.core.windows.net/ , for all customers + +#### Adding the service principal to your ADL Account +1. Go to the portal again, and open your ADL account +2. Select Users under Settings +3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name) +4. Add "Owner" role + +#### Configure core-site.xml +Add the following properties to your core-site.xml + + + dfs.adls.oauth2.refresh.url + TOKEN ENDPOINT FROM STEP 7 ABOVE + + + + dfs.adls.oauth2.client.id + CLIENT ID FROM STEP 7 ABOVE + + + + dfs.adls.oauth2.credential + PASSWORD FROM STEP 7 ABOVE + + + +### Protecting the Credentials with Credential Providers + +In many Hadoop clusters, the core-site.xml file is world-readable. To protect +these credentials from prying eyes, it is recommended that you use the +credential provider framework to securely store them and access them through +configuration. + +All ADLS credential properties can be protected by credential providers. +For additional reading on the credential provider API, see +[Credential Provider API](../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). + +#### Provisioning + +``` +% hadoop credential create dfs.adls.oauth2.refresh.token -value 123 + -provider localjceks://file/home/foo/adls.jceks +% hadoop credential create dfs.adls.oauth2.credential -value 123 + -provider localjceks://file/home/foo/adls.jceks +``` + +#### Configuring core-site.xml or command line property + +``` + + hadoop.security.credential.provider.path + localjceks://file/home/foo/adls.jceks + Path to interrogate for protected credentials. + +``` + +#### Running DistCp + +``` +% hadoop distcp + [-D hadoop.security.credential.provider.path=localjceks://file/home/user/adls.jceks] + hdfs://:9001/user/foo/007020615 + adl://.azuredatalakestore.net/testDir/ +``` + +NOTE: You may optionally add the provider path property to the distcp command +line instead of added job specific configuration to a generic core-site.xml. +The square brackets above illustrate this capability. + + +## Enabling ADL Filesystem + +For ADL FileSystem to take effect. Update core-site.xml with + + + fs.adl.impl + org.apache.hadoop.fs.adl.AdlFileSystem + + + + fs.AbstractFileSystem.adl.impl + org.apache.hadoop.fs.adl.Adl + + + +### Accessing adl URLs + +After credentials are configured in core-site.xml, any Hadoop component may +reference files in that Azure Data Lake Storage account by using URLs of the following +format: + + adl://.azuredatalakestore.net/ + +The schemes `adl` identify a URL on a file system backed by Azure +Data Lake Storage. `adl` utilizes encrypted HTTPS access for all interaction with +the Azure Data Lake Storage API. + +For example, the following +[FileSystem Shell](../hadoop-project-dist/hadoop-common/FileSystemShell.html) +commands demonstrate access to a storage account named `youraccount`. + + > hadoop fs -mkdir adl://yourcontainer.azuredatalakestore.net/testDir + + > hadoop fs -put testFile adl://yourcontainer.azuredatalakestore.net/testDir/testFile + + > hadoop fs -cat adl://yourcontainer.azuredatalakestore.net/testDir/testFile + test file content + +### User/Group Representation +The hadoop-azure-datalake module provides support for configuring how +User/Group information is represented during +getFileStatus/listStatus/getAclStatus. + +Add the following properties to your core-site.xml + + + adl.feature.ownerandgroup.enableupn + true + + When true : User and Group in FileStatus/AclStatus response is + represented as user friendly name as per Azure AD profile. + + When false (default) : User and Group in FileStatus/AclStatus + response is represented by the unique identifier from Azure AD + profile (Object ID as GUID). + + For performance optimization, Recommended default value. + + + +## Testing the azure-datalake-store Module +The hadoop-azure module includes a full suite of unit tests. Most of the tests will run without additional configuration by running mvn test. This includes tests against mocked storage, which is an in-memory emulation of Azure Data Lake Storage. + +A selection of tests can run against the Azure Data Lake Storage. To run these +tests, please create `src/test/resources/auth-keys.xml` with Adl account +information mentioned in the above sections and the following properties. + + + dfs.adl.test.contract.enable + true + + + + test.fs.adl.name + adl://yourcontainer.azuredatalakestore.net + diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java new file mode 100644 index 0000000..55c8f81 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; + +import com.squareup.okhttp.mockwebserver.MockWebServer; + +import org.junit.After; +import org.junit.Before; + +/** + * Mock server to simulate Adls backend calls. This infrastructure is expandable + * to override expected server response based on the derived test functionality. + * Common functionality to generate token information before request is send to + * adls backend is also managed within AdlMockWebServer implementation using + * {@link org.apache.hadoop.fs.adl.common.CustomMockTokenProvider}. + */ +public class AdlMockWebServer { + // Create a MockWebServer. These are lean enough that you can create a new + // instance for every unit test. + private MockWebServer server = null; + private TestableAdlFileSystem fs = null; + private int port = 0; + private Configuration conf = new Configuration(); + + public MockWebServer getMockServer() { + return server; + } + + public TestableAdlFileSystem getMockAdlFileSystem() { + return fs; + } + + public int getPort() { + return port; + } + + public Configuration getConf() { + return conf; + } + + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Before + public void preTestSetup() throws IOException, URISyntaxException { + server = new MockWebServer(); + + // Start the server. + server.start(); + + // Ask the server for its URL. You'll need this to make HTTP requests. + URL baseUrl = server.getUrl(""); + port = baseUrl.getPort(); + + // Exercise your application code, which should make those HTTP requests. + // Responses are returned in the same order that they are enqueued. + fs = new TestableAdlFileSystem(); + + conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + CustomMockTokenProvider.class, AzureADTokenProvider.class); + + URI uri = new URI("adl://localhost:" + port); + fs.initialize(uri, conf); + } + + @After + public void postTestSetup() throws IOException { + fs.close(); + server.shutdown(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java new file mode 100644 index 0000000..b420daa --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; + +import com.squareup.okhttp.mockwebserver.MockResponse; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Stub adl server and test acl data conversion within SDK and Hadoop adl + * client. + */ +public class TestACLFeatures extends AdlMockWebServer { + + @Test(expected=AccessControlException.class) + public void testModifyAclEntries() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + aclEntryBuilder.setPermission(FsAction.ALL); + aclEntryBuilder.setScope(AclEntryScope.ACCESS); + entries.add(aclEntryBuilder.build()); + + aclEntryBuilder.setName("hdfs"); + aclEntryBuilder.setType(AclEntryType.GROUP); + aclEntryBuilder.setPermission(FsAction.READ_WRITE); + aclEntryBuilder.setScope(AclEntryScope.DEFAULT); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().modifyAclEntries(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .modifyAclEntries(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testRemoveAclEntriesWithOnlyUsers() + throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .removeAclEntries(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testRemoveAclEntries() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + aclEntryBuilder.setPermission(FsAction.ALL); + aclEntryBuilder.setScope(AclEntryScope.ACCESS); + entries.add(aclEntryBuilder.build()); + + aclEntryBuilder.setName("hdfs"); + aclEntryBuilder.setType(AclEntryType.GROUP); + aclEntryBuilder.setPermission(FsAction.READ_WRITE); + aclEntryBuilder.setScope(AclEntryScope.DEFAULT); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .removeAclEntries(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testRemoveDefaultAclEntries() + throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2")); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2")); + } + + @Test(expected=AccessControlException.class) + public void testRemoveAcl() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().removeAcl(new Path("/test1/test2")); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().removeAcl(new Path("/test1/test2")); + } + + @Test(expected=AccessControlException.class) + public void testSetAcl() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + aclEntryBuilder.setPermission(FsAction.ALL); + aclEntryBuilder.setScope(AclEntryScope.ACCESS); + entries.add(aclEntryBuilder.build()); + + aclEntryBuilder.setName("hdfs"); + aclEntryBuilder.setType(AclEntryType.GROUP); + aclEntryBuilder.setPermission(FsAction.READ_WRITE); + aclEntryBuilder.setScope(AclEntryScope.DEFAULT); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testCheckAccess() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.ALL); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.EXECUTE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.READ); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.READ_EXECUTE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.READ_WRITE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.NONE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.WRITE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE); + } + + @Test(expected=AccessControlException.class) + public void testSetPermission() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .setPermission(new Path("/test1/test2"), FsPermission.getDefault()); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .setPermission(new Path("/test1/test2"), FsPermission.getDefault()); + } + + @Test(expected=AccessControlException.class) + public void testSetOwner() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().setOwner(new Path("/test1/test2"), "hadoop", "hdfs"); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .setOwner(new Path("/test1/test2"), "hadoop", "hdfs"); + } + + @Test + public void getAclStatusAsExpected() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetAclStatusJSONResponse())); + AclStatus aclStatus = getMockAdlFileSystem() + .getAclStatus(new Path("/test1/test2")); + Assert.assertEquals(aclStatus.getGroup(), "supergroup"); + Assert.assertEquals(aclStatus.getOwner(), "hadoop"); + Assert.assertEquals((Short) aclStatus.getPermission().toShort(), + Short.valueOf("775", 8)); + + for (AclEntry entry : aclStatus.getEntries()) { + if (!(entry.toString().equalsIgnoreCase("user:carla:rw-") || entry + .toString().equalsIgnoreCase("group::r-x"))) { + Assert.fail("Unexpected entry : " + entry.toString()); + } + } + } + + @Test(expected=FileNotFoundException.class) + public void getAclStatusNotExists() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(404) + .setBody(TestADLResponseData.getFileNotFoundException())); + + getMockAdlFileSystem().getAclStatus(new Path("/test1/test2")); + } + + @Test(expected=AccessControlException.class) + public void testAclStatusDenied() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().getAclStatus(new Path("/test1/test2")); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java new file mode 100644 index 0000000..788242e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.fs.FileStatus; + +import java.util.Random; + +/** + * Mock up response data returned from Adl storage account. + */ +public final class TestADLResponseData { + + private TestADLResponseData() { + + } + + public static String getGetFileStatusJSONResponse(FileStatus status) { + return "{\"FileStatus\":{\"length\":" + status.getLen() + "," + + "\"pathSuffix\":\"\",\"type\":\"" + (status.isDirectory() ? + "DIRECTORY" : + "FILE") + "\"" + + ",\"blockSize\":" + status.getBlockSize() + ",\"accessTime\":" + + status.getAccessTime() + ",\"modificationTime\":" + status + .getModificationTime() + "" + + ",\"replication\":" + status.getReplication() + ",\"permission\":\"" + + status.getPermission() + "\",\"owner\":\"" + status.getOwner() + + "\",\"group\":\"" + status.getGroup() + "\"}}"; + } + + public static String getGetFileStatusJSONResponse() { + return getGetFileStatusJSONResponse(4194304); + } + + public static String getGetAclStatusJSONResponse() { + return "{\n" + " \"AclStatus\": {\n" + " \"entries\": [\n" + + " \"user:carla:rw-\", \n" + " \"group::r-x\"\n" + + " ], \n" + " \"group\": \"supergroup\", \n" + + " \"owner\": \"hadoop\", \n" + + " \"permission\":\"775\",\n" + " \"stickyBit\": false\n" + + " }\n" + "}"; + } + + public static String getGetFileStatusJSONResponse(long length) { + return "{\"FileStatus\":{\"length\":" + length + "," + + "\"pathSuffix\":\"\",\"type\":\"FILE\",\"blockSize\":268435456," + + "\"accessTime\":1452103827023,\"modificationTime\":1452103827023," + + "\"replication\":0,\"permission\":\"777\"," + + "\"owner\":\"NotSupportYet\",\"group\":\"NotSupportYet\"}}"; + } + + public static String getGetFileStatusJSONResponse(boolean aclBit) { + return "{\"FileStatus\":{\"length\":1024," + + "\"pathSuffix\":\"\",\"type\":\"FILE\",\"blockSize\":268435456," + + "\"accessTime\":1452103827023,\"modificationTime\":1452103827023," + + "\"replication\":0,\"permission\":\"777\"," + + "\"owner\":\"NotSupportYet\",\"group\":\"NotSupportYet\",\"aclBit\":\"" + + aclBit + "\"}}"; + } + + public static String getListFileStatusJSONResponse(int dirSize) { + String list = ""; + for (int i = 0; i < dirSize; ++i) { + list += "{\"length\":1024,\"pathSuffix\":\"" + java.util.UUID.randomUUID() + + "\",\"type\":\"FILE\",\"blockSize\":268435456," + + "\"accessTime\":1452103878833," + + "\"modificationTime\":1452103879190,\"replication\":0," + + "\"permission\":\"777\",\"owner\":\"NotSupportYet\"," + + "\"group\":\"NotSupportYet\"},"; + } + + list = list.substring(0, list.length() - 1); + return "{\"FileStatuses\":{\"FileStatus\":[" + list + "]}}"; + } + + public static String getListFileStatusJSONResponse(boolean aclBit) { + return "{\"FileStatuses\":{\"FileStatus\":[{\"length\":0,\"pathSuffix\":\"" + + java.util.UUID.randomUUID() + + "\",\"type\":\"DIRECTORY\",\"blockSize\":0," + + "\"accessTime\":1481184513488," + + "\"modificationTime\":1481184513488,\"replication\":0," + + "\"permission\":\"770\"," + + "\"owner\":\"4b27fe1a-d9ab-4a04-ad7a-4bba72cd9e6c\"," + + "\"group\":\"4b27fe1a-d9ab-4a04-ad7a-4bba72cd9e6c\",\"aclBit\":\"" + + aclBit + "\"}]}}"; + } + + public static String getJSONResponse(boolean status) { + return "{\"boolean\":" + status + "}"; + } + + public static String getErrorIllegalArgumentExceptionJSONResponse() { + return "{\n" + + " \"RemoteException\":\n" + + " {\n" + + " \"exception\" : \"IllegalArgumentException\",\n" + + " \"javaClassName\": \"java.lang.IllegalArgumentException\",\n" + + " \"message\" : \"Invalid\"" + + " }\n" + + "}"; + } + + public static String getErrorBadOffsetExceptionJSONResponse() { + return "{\n" + + " \"RemoteException\":\n" + + " {\n" + + " \"exception\" : \"BadOffsetException\",\n" + + " \"javaClassName\": \"org.apache.hadoop.fs.adl" + + ".BadOffsetException\",\n" + + " \"message\" : \"Invalid\"" + + " }\n" + + "}"; + } + + public static String getErrorInternalServerExceptionJSONResponse() { + return "{\n" + + " \"RemoteException\":\n" + + " {\n" + + " \"exception\" : \"RuntimeException\",\n" + + " \"javaClassName\": \"java.lang.RuntimeException\",\n" + + " \"message\" : \"Internal Server Error\"" + + " }\n" + + "}"; + } + + public static String getAccessControlException() { + return "{\n" + " \"RemoteException\":\n" + " {\n" + + " \"exception\" : \"AccessControlException\",\n" + + " \"javaClassName\": \"org.apache.hadoop.security" + + ".AccessControlException\",\n" + + " \"message\" : \"Permission denied: ...\"\n" + " }\n" + "}"; + } + + public static String getFileNotFoundException() { + return "{\n" + " \"RemoteException\":\n" + " {\n" + + " \"exception\" : \"FileNotFoundException\",\n" + + " \"javaClassName\": \"java.io.FileNotFoundException\",\n" + + " \"message\" : \"File does not exist\"\n" + " }\n" + "}"; + } + + public static byte[] getRandomByteArrayData() { + return getRandomByteArrayData(4 * 1024 * 1024); + } + + public static byte[] getRandomByteArrayData(int size) { + byte[] b = new byte[size]; + Random rand = new Random(); + rand.nextBytes(b); + return b; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java new file mode 100644 index 0000000..172663c --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.Parallelized; +import org.apache.hadoop.fs.adl.common.TestDataForRead; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY; + +/** + * This class is responsible for stress positional reads vs number of network + * calls required by to fetch the amount of data. Test does ensure the data + * integrity and order of the data is maintained. + */ +@RunWith(Parallelized.class) +public class TestAdlRead extends AdlMockWebServer { + + private TestDataForRead testData; + + public TestAdlRead(TestDataForRead testData) { + Configuration configuration = new Configuration(); + configuration.setInt(READ_AHEAD_BUFFER_SIZE_KEY, 4 * 1024); + setConf(configuration); + this.testData = testData; + } + + @Parameterized.Parameters(name = "{index}") + public static Collection testDataForReadOperation() { + return Arrays.asList(new Object[][] { + + //-------------------------- + // Test Data + //-------------------------- + {new TestDataForRead("Hello World".getBytes(), 2, 1000, true)}, + {new TestDataForRead( + ("the problem you appear to be wrestling with is that this doesn't " + + "display very well. ").getBytes(), 2, 1000, true)}, + {new TestDataForRead(("您的數據是寶貴的資產,以您的組織,並有當前和未來價值。由於這個原因," + + "所有的數據應存儲以供將來分析。今天,這往往是不這樣做," + "因為傳統的分析基礎架構的限制," + + "像模式的預定義,存儲大數據集和不同的數據筒倉的傳播的成本。" + + "為了應對這一挑戰,數據湖面概念被引入作為一個企業級存儲庫來存儲所有" + + "類型的在一個地方收集到的數據。對於運作和探索性分析的目的,所有類型的" + "數據可以定義需求或模式之前被存儲在數據湖。") + .getBytes(), 2, 1000, true)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(4 * 1024), 2, 10, true)}, + {new TestDataForRead(TestADLResponseData.getRandomByteArrayData(100), 2, + 1000, true)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(1 * 1024), 2, 50, true)}, + {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(8 * 1024), 3, 10, + false)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(16 * 1024), 5, 10, false)}, + {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(32 * 1024), 9, 10, + false)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(64 * 1024), 17, 10, + false)}}); + } + + @Test + public void testEntireBytes() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + byte[] expectedData = new byte[testData.getActualData().length]; + int n = 0; + int len = expectedData.length; + int off = 0; + while (n < len) { + int count = in.read(expectedData, off + n, len - n); + if (count < 0) { + throw new EOFException(); + } + n += count; + } + + Assert.assertEquals(expectedData.length, testData.getActualData().length); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + in.close(); + if (testData.isCheckOfNoOfCalls()) { + Assert.assertEquals(testData.getExpectedNoNetworkCall(), + getMockServer().getRequestCount()); + } + } + + @Test + public void testSeekOperation() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + Random random = new Random(); + for (int i = 0; i < 1000; ++i) { + int position = random.nextInt(testData.getActualData().length); + in.seek(position); + Assert.assertEquals(position, in.getPos()); + Assert.assertEquals(testData.getActualData()[position] & 0xFF, in.read()); + } + in.close(); + if (testData.isCheckOfNoOfCalls()) { + Assert.assertEquals(testData.getExpectedNoNetworkCall(), + getMockServer().getRequestCount()); + } + } + + @Test + public void testReadServerCalls() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + byte[] expectedData = new byte[testData.getActualData().length]; + in.readFully(expectedData); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + Assert.assertEquals(testData.getExpectedNoNetworkCall(), + getMockServer().getRequestCount()); + in.close(); + } + + @Test + public void testReadFully() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + byte[] expectedData = new byte[testData.getActualData().length]; + in.readFully(expectedData); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + + in.readFully(0, expectedData); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + + in.readFully(0, expectedData, 0, expectedData.length); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + in.close(); + } + + @Test + public void testRandomPositionalReadUsingReadFully() + throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + ByteArrayInputStream actualData = new ByteArrayInputStream( + testData.getActualData()); + Random random = new Random(); + for (int i = 0; i < testData.getIntensityOfTest(); ++i) { + int offset = random.nextInt(testData.getActualData().length); + int length = testData.getActualData().length - offset; + byte[] expectedData = new byte[length]; + byte[] actualDataSubset = new byte[length]; + actualData.reset(); + actualData.skip(offset); + actualData.read(actualDataSubset, 0, length); + + in.readFully(offset, expectedData, 0, length); + Assert.assertArrayEquals(expectedData, actualDataSubset); + } + + for (int i = 0; i < testData.getIntensityOfTest(); ++i) { + int offset = random.nextInt(testData.getActualData().length); + int length = random.nextInt(testData.getActualData().length - offset); + byte[] expectedData = new byte[length]; + byte[] actualDataSubset = new byte[length]; + actualData.reset(); + actualData.skip(offset); + actualData.read(actualDataSubset, 0, length); + + in.readFully(offset, expectedData, 0, length); + Assert.assertArrayEquals(expectedData, actualDataSubset); + } + + in.close(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java new file mode 100644 index 0000000..70f2a7f --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java @@ -0,0 +1,285 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.commons.lang.builder.EqualsBuilder; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; + +import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_TYPE_KEY; +import static org.apache.hadoop.fs.adl.TokenProviderType.*; +import static org.junit.Assert.assertEquals; + +import org.apache.hadoop.security.ProviderUtils; +import org.apache.hadoop.security.alias.CredentialProvider; +import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +/** + * Test appropriate token provider is loaded as per configuration. + */ +public class TestAzureADTokenProvider { + + private static final String CLIENT_ID = "MY_CLIENT_ID"; + private static final String REFRESH_TOKEN = "MY_REFRESH_TOKEN"; + private static final String CLIENT_SECRET = "MY_CLIENT_SECRET"; + private static final String REFRESH_URL = "http://localhost:8080/refresh"; + + @Rule + public final TemporaryFolder tempDir = new TemporaryFolder(); + + @Test + public void testRefreshTokenProvider() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID"); + conf.set(AZURE_AD_REFRESH_TOKEN_KEY, "XYZ"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken); + conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh"); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + AccessTokenProvider tokenProvider = fileSystem.getTokenProvider(); + Assert.assertTrue(tokenProvider instanceof RefreshTokenBasedTokenProvider); + } + + @Test + public void testClientCredTokenProvider() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID"); + conf.set(AZURE_AD_CLIENT_SECRET_KEY, "XYZ"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential); + conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh"); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + AccessTokenProvider tokenProvider = fileSystem.getTokenProvider(); + Assert.assertTrue(tokenProvider instanceof ClientCredsTokenProvider); + } + + @Test + public void testCustomCredTokenProvider() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + CustomMockTokenProvider.class, AzureADTokenProvider.class); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + AccessTokenProvider tokenProvider = fileSystem.getTokenProvider(); + Assert.assertTrue(tokenProvider instanceof SdkTokenProviderAdapter); + } + + @Test + public void testInvalidProviderConfigurationForType() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + try { + fileSystem.initialize(uri, conf); + Assert.fail("Initialization should have failed due no token provider " + + "configuration"); + } catch (IllegalArgumentException e) { + Assert.assertTrue( + e.getMessage().contains("dfs.adls.oauth2.access.token.provider")); + } + conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + CustomMockTokenProvider.class, AzureADTokenProvider.class); + fileSystem.initialize(uri, conf); + } + + @Test + public void testInvalidProviderConfigurationForClassPath() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + conf.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + "wrong.classpath.CustomMockTokenProvider"); + try { + fileSystem.initialize(uri, conf); + Assert.fail("Initialization should have failed due invalid provider " + + "configuration"); + } catch (RuntimeException e) { + Assert.assertTrue( + e.getMessage().contains("wrong.classpath.CustomMockTokenProvider")); + } + } + + private CredentialProvider createTempCredProvider(Configuration conf) + throws URISyntaxException, IOException { + final File file = tempDir.newFile("test.jks"); + final URI jks = ProviderUtils.nestURIForLocalJavaKeyStoreProvider( + file.toURI()); + conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + jks.toString()); + return CredentialProviderFactory.getProviders(conf).get(0); + } + + @Test + public void testRefreshTokenWithCredentialProvider() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "DUMMY"); + conf.set(AZURE_AD_REFRESH_TOKEN_KEY, "DUMMY"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken); + + CredentialProvider provider = createTempCredProvider(conf); + provider.createCredentialEntry(AZURE_AD_CLIENT_ID_KEY, + CLIENT_ID.toCharArray()); + provider.createCredentialEntry(AZURE_AD_REFRESH_TOKEN_KEY, + REFRESH_TOKEN.toCharArray()); + provider.flush(); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + RefreshTokenBasedTokenProvider expected = + new RefreshTokenBasedTokenProvider(CLIENT_ID, REFRESH_TOKEN); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testRefreshTokenWithCredentialProviderFallback() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, CLIENT_ID); + conf.set(AZURE_AD_REFRESH_TOKEN_KEY, REFRESH_TOKEN); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken); + + createTempCredProvider(conf); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + RefreshTokenBasedTokenProvider expected = + new RefreshTokenBasedTokenProvider(CLIENT_ID, REFRESH_TOKEN); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testClientCredWithCredentialProvider() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "DUMMY"); + conf.set(AZURE_AD_CLIENT_SECRET_KEY, "DUMMY"); + conf.set(AZURE_AD_REFRESH_URL_KEY, "DUMMY"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential); + + CredentialProvider provider = createTempCredProvider(conf); + provider.createCredentialEntry(AZURE_AD_CLIENT_ID_KEY, + CLIENT_ID.toCharArray()); + provider.createCredentialEntry(AZURE_AD_CLIENT_SECRET_KEY, + CLIENT_SECRET.toCharArray()); + provider.createCredentialEntry(AZURE_AD_REFRESH_URL_KEY, + REFRESH_URL.toCharArray()); + provider.flush(); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + ClientCredsTokenProvider expected = new ClientCredsTokenProvider( + REFRESH_URL, CLIENT_ID, CLIENT_SECRET); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testClientCredWithCredentialProviderFallback() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, CLIENT_ID); + conf.set(AZURE_AD_CLIENT_SECRET_KEY, CLIENT_SECRET); + conf.set(AZURE_AD_REFRESH_URL_KEY, REFRESH_URL); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential); + + createTempCredProvider(conf); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + ClientCredsTokenProvider expected = new ClientCredsTokenProvider( + REFRESH_URL, CLIENT_ID, CLIENT_SECRET); + Assert.assertTrue(EqualsBuilder.reflectionEquals(expected, + fileSystem.getTokenProvider())); + } + + @Test + public void testCredentialProviderPathExclusions() throws Exception { + String providerPath = + "user:///,jceks://adl/user/hrt_qa/sqoopdbpasswd.jceks," + + "jceks://hdfs@nn1.example.com/my/path/test.jceks"; + Configuration config = new Configuration(); + config.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + providerPath); + String newPath = + "user:///,jceks://hdfs@nn1.example.com/my/path/test.jceks"; + + excludeAndTestExpectations(config, newPath); + } + + @Test + public void testExcludeAllProviderTypesFromConfig() throws Exception { + String providerPath = + "jceks://adl/tmp/test.jceks," + + "jceks://adl@/my/path/test.jceks"; + Configuration config = new Configuration(); + config.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, + providerPath); + String newPath = null; + + excludeAndTestExpectations(config, newPath); + } + + void excludeAndTestExpectations(Configuration config, String newPath) + throws Exception { + Configuration conf = ProviderUtils.excludeIncompatibleCredentialProviders( + config, AdlFileSystem.class); + String effectivePath = conf.get( + CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, null); + assertEquals(newPath, effectivePath); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java new file mode 100644 index 0000000..b790562 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.Dispatcher; +import com.squareup.okhttp.mockwebserver.MockResponse; +import com.squareup.okhttp.mockwebserver.RecordedRequest; +import okio.Buffer; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * This class is responsible for testing multiple threads trying to access same + * or multiple files from the offset. + */ +@RunWith(Parameterized.class) +public class TestConcurrentDataReadOperations extends AdlMockWebServer { + private static final Logger LOG = LoggerFactory + .getLogger(TestConcurrentDataReadOperations.class); + private static final Object LOCK = new Object(); + private static FSDataInputStream commonHandle = null; + private int concurrencyLevel; + + public TestConcurrentDataReadOperations(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + } + + @Parameterized.Parameters(name = "{index}") + public static Collection testDataNumberOfConcurrentRun() { + return Arrays.asList(new Object[][] {{1}, {2}, {3}, {4}, {5}}); + } + + public static byte[] getRandomByteArrayData(int size) { + byte[] b = new byte[size]; + Random rand = new Random(); + rand.nextBytes(b); + return b; + } + + private void setDispatcher(final ArrayList testData) { + getMockServer().setDispatcher(new Dispatcher() { + @Override + public MockResponse dispatch(RecordedRequest recordedRequest) + throws InterruptedException { + CreateTestData currentRequest = null; + for (CreateTestData local : testData) { + if (recordedRequest.getPath().contains(local.path.toString())) { + currentRequest = local; + break; + } + } + + if (currentRequest == null) { + new MockResponse().setBody("Request data not found") + .setResponseCode(501); + } + + if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) { + return new MockResponse().setResponseCode(200).setBody( + TestADLResponseData + .getGetFileStatusJSONResponse(currentRequest.data.length)); + } + + if (recordedRequest.getRequestLine().contains("op=OPEN")) { + String request = recordedRequest.getRequestLine(); + int offset = 0; + int byteCount = 0; + + Pattern pattern = Pattern.compile("offset=([0-9]+)"); + Matcher matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + offset = Integer.parseInt(matcher.group(1)); + } + + pattern = Pattern.compile("length=([0-9]+)"); + matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + byteCount = Integer.parseInt(matcher.group(1)); + } + + Buffer buf = new Buffer(); + buf.write(currentRequest.data, offset, + Math.min(currentRequest.data.length - offset, byteCount)); + return new MockResponse().setResponseCode(200) + .setChunkedBody(buf, 4 * 1024 * 1024); + } + + return new MockResponse().setBody("NOT SUPPORTED").setResponseCode(501); + } + }); + } + + @Before + public void resetHandle() { + commonHandle = null; + } + + @Test + public void testParallelReadOnDifferentStreams() + throws IOException, InterruptedException, ExecutionException { + + ArrayList createTestData = new ArrayList(); + + Random random = new Random(); + + for (int i = 0; i < concurrencyLevel; i++) { + CreateTestData testData = new CreateTestData(); + testData + .set(new Path("/test/concurrentRead/" + UUID.randomUUID().toString()), + getRandomByteArrayData(random.nextInt(1 * 1024 * 1024))); + createTestData.add(testData); + } + + setDispatcher(createTestData); + + ArrayList readTestData = new ArrayList(); + for (CreateTestData local : createTestData) { + ReadTestData localReadData = new ReadTestData(); + localReadData.set(local.path, local.data, 0); + readTestData.add(localReadData); + } + + runReadTest(readTestData, false); + } + + @Test + public void testParallelReadOnSameStreams() + throws IOException, InterruptedException, ExecutionException { + ArrayList createTestData = new ArrayList(); + + Random random = new Random(); + + for (int i = 0; i < 1; i++) { + CreateTestData testData = new CreateTestData(); + testData + .set(new Path("/test/concurrentRead/" + UUID.randomUUID().toString()), + getRandomByteArrayData(1024 * 1024)); + createTestData.add(testData); + } + + setDispatcher(createTestData); + + ArrayList readTestData = new ArrayList(); + ByteArrayInputStream buffered = new ByteArrayInputStream( + createTestData.get(0).data); + + ReadTestData readInitially = new ReadTestData(); + byte[] initialData = new byte[1024 * 1024]; + buffered.read(initialData); + + readInitially.set(createTestData.get(0).path, initialData, 0); + readTestData.add(readInitially); + runReadTest(readTestData, false); + + readTestData.clear(); + + for (int i = 0; i < concurrencyLevel * 5; i++) { + ReadTestData localReadData = new ReadTestData(); + int offset = random.nextInt((1024 * 1024) - 1); + int length = 1024 * 1024 - offset; + byte[] expectedData = new byte[length]; + buffered.reset(); + buffered.skip(offset); + buffered.read(expectedData); + localReadData.set(createTestData.get(0).path, expectedData, offset); + readTestData.add(localReadData); + } + + runReadTest(readTestData, true); + } + + void runReadTest(ArrayList testData, boolean useSameStream) + throws InterruptedException, ExecutionException { + + ExecutorService executor = Executors.newFixedThreadPool(testData.size()); + Future[] subtasks = new Future[testData.size()]; + + for (int i = 0; i < testData.size(); i++) { + subtasks[i] = executor.submit( + new ReadConcurrentRunnable(testData.get(i).data, testData.get(i).path, + testData.get(i).offset, useSameStream)); + } + + executor.shutdown(); + + // wait until all tasks are finished + executor.awaitTermination(120, TimeUnit.SECONDS); + + for (int i = 0; i < testData.size(); ++i) { + Assert.assertTrue((Boolean) subtasks[i].get()); + } + } + + class ReadTestData { + private Path path; + private byte[] data; + private int offset; + + public void set(Path filePath, byte[] dataToBeRead, int fromOffset) { + this.path = filePath; + this.data = dataToBeRead; + this.offset = fromOffset; + } + } + + class CreateTestData { + private Path path; + private byte[] data; + + public void set(Path filePath, byte[] dataToBeWritten) { + this.path = filePath; + this.data = dataToBeWritten; + } + } + + class ReadConcurrentRunnable implements Callable { + private Path path; + private int offset; + private byte[] expectedData; + private boolean useSameStream; + + public ReadConcurrentRunnable(byte[] expectedData, Path path, int offset, + boolean useSameStream) { + this.path = path; + this.offset = offset; + this.expectedData = expectedData; + this.useSameStream = useSameStream; + } + + public Boolean call() throws IOException { + try { + FSDataInputStream in; + if (useSameStream) { + synchronized (LOCK) { + if (commonHandle == null) { + commonHandle = getMockAdlFileSystem().open(path); + } + in = commonHandle; + } + } else { + in = getMockAdlFileSystem().open(path); + } + + byte[] actualData = new byte[expectedData.length]; + in.readFully(offset, actualData); + Assert.assertArrayEquals("Path :" + path.toString() + " did not match.", + expectedData, actualData); + if (!useSameStream) { + in.close(); + } + } catch (IOException e) { + e.printStackTrace(); + return false; + } + return true; + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java new file mode 100644 index 0000000..c594c65 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.MockResponse; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; +import org.apache.hadoop.fs.permission.FsPermission; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Collection; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; + +/** + * Test access token provider behaviour with custom token provider and for token + * provider cache is enabled. + */ +@RunWith(Parameterized.class) +public class TestCustomTokenProvider extends AdlMockWebServer { + private static final long TEN_MINUTES_IN_MILIS = 600000; + private int backendCallCount; + private int expectedCallbackToAccessToken; + private TestableAdlFileSystem[] fileSystems; + private Class typeOfTokenProviderClass; + private long expiryFromNow; + private int fsObjectCount; + + public TestCustomTokenProvider(Class typeOfTokenProviderClass, + long expiryFromNow, int fsObjectCount, int backendCallCount, + int expectedCallbackToAccessToken) + throws IllegalAccessException, InstantiationException, URISyntaxException, + IOException { + this.typeOfTokenProviderClass = typeOfTokenProviderClass; + this.expiryFromNow = expiryFromNow; + this.fsObjectCount = fsObjectCount; + this.backendCallCount = backendCallCount; + this.expectedCallbackToAccessToken = expectedCallbackToAccessToken; + } + + @Parameterized.Parameters(name = "{index}") + public static Collection testDataForTokenProvider() { + return Arrays.asList(new Object[][] { + // Data set in order + // INPUT - CustomTokenProvider class to load + // INPUT - expiry time in milis. Subtract from current time + // INPUT - No. of FileSystem object + // INPUT - No. of backend calls per FileSystem object + // EXPECTED - Number of callbacks to get token after test finished. + {CustomMockTokenProvider.class, 0, 1, 1, 1}, + {CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 1, 1, 1}, + {CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 2, 1, 2}, + {CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 10, 10, 10}}); + } + + /** + * Explicitly invoked init so that base class mock server is setup before + * test data initialization is done. + * + * @throws IOException + * @throws URISyntaxException + */ + public void init() throws IOException, URISyntaxException { + Configuration configuration = new Configuration(); + configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + typeOfTokenProviderClass.getName()); + fileSystems = new TestableAdlFileSystem[fsObjectCount]; + URI uri = new URI("adl://localhost:" + getPort()); + + for (int i = 0; i < fsObjectCount; ++i) { + fileSystems[i] = new TestableAdlFileSystem(); + fileSystems[i].initialize(uri, configuration); + + ((CustomMockTokenProvider) fileSystems[i].getAzureTokenProvider()) + .setExpiryTimeInMillisAfter(expiryFromNow); + } + } + + @Test + public void testCustomTokenManagement() + throws IOException, URISyntaxException { + int accessTokenCallbackDuringExec = 0; + init(); + for (TestableAdlFileSystem tfs : fileSystems) { + for (int i = 0; i < backendCallCount; ++i) { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetFileStatusJSONResponse())); + FileStatus fileStatus = tfs.getFileStatus(new Path("/test1/test2")); + Assert.assertTrue(fileStatus.isFile()); + Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" + + getMockServer().getPort() + "/test1/test2", + fileStatus.getPath().toString()); + Assert.assertEquals(4194304, fileStatus.getLen()); + Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize()); + Assert.assertEquals(1, fileStatus.getReplication()); + Assert + .assertEquals(new FsPermission("777"), fileStatus.getPermission()); + Assert.assertEquals("NotSupportYet", fileStatus.getOwner()); + Assert.assertEquals("NotSupportYet", fileStatus.getGroup()); + } + + accessTokenCallbackDuringExec += ((CustomMockTokenProvider) tfs + .getAzureTokenProvider()).getAccessTokenRequestCount(); + } + + Assert.assertEquals(expectedCallbackToAccessToken, + accessTokenCallbackDuringExec); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java new file mode 100644 index 0000000..78ef931 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.MockResponse; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Time; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE; + +/** + * This class is responsible for testing local getFileStatus implementation + * to cover correct parsing of successful and error JSON response + * from the server. + * Adls GetFileStatus operation is in detail covered in + * org.apache.hadoop.fs.adl.live testing package. + */ +public class TestGetFileStatus extends AdlMockWebServer { + private static final Logger LOG = LoggerFactory + .getLogger(TestGetFileStatus.class); + + @Test + public void getFileStatusReturnsAsExpected() + throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetFileStatusJSONResponse())); + long startTime = Time.monotonicNow(); + FileStatus fileStatus = getMockAdlFileSystem() + .getFileStatus(new Path("/test1/test2")); + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertTrue(fileStatus.isFile()); + Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" + + getMockServer().getPort() + "/test1/test2", + fileStatus.getPath().toString()); + Assert.assertEquals(4194304, fileStatus.getLen()); + Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize()); + Assert.assertEquals(1, fileStatus.getReplication()); + Assert.assertEquals(new FsPermission("777"), fileStatus.getPermission()); + Assert.assertEquals("NotSupportYet", fileStatus.getOwner()); + Assert.assertEquals("NotSupportYet", fileStatus.getGroup()); + } + + @Test + public void getFileStatusAclBit() + throws URISyntaxException, IOException { + // With ACLBIT set to true + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetFileStatusJSONResponse(true))); + long startTime = Time.monotonicNow(); + FileStatus fileStatus = getMockAdlFileSystem() + .getFileStatus(new Path("/test1/test2")); + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertTrue(fileStatus.isFile()); + Assert.assertEquals(true, fileStatus.getPermission().getAclBit()); + + // With ACLBIT set to false + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetFileStatusJSONResponse(false))); + startTime = Time.monotonicNow(); + fileStatus = getMockAdlFileSystem() + .getFileStatus(new Path("/test1/test2")); + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertTrue(fileStatus.isFile()); + Assert.assertEquals(false, fileStatus.getPermission().getAclBit()); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java new file mode 100644 index 0000000..dac8886 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.MockResponse; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Time; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URISyntaxException; + +/** + * This class is responsible for testing local listStatus implementation to + * cover correct parsing of successful and error JSON response from the server. + * Adls ListStatus functionality is in detail covered in + * org.apache.hadoop.fs.adl.live testing package. + */ +public class TestListStatus extends AdlMockWebServer { + + private static final Logger LOG = LoggerFactory + .getLogger(TestListStatus.class); + + @Test + public void listStatusReturnsAsExpected() throws IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(10))); + long startTime = Time.monotonicNow(); + FileStatus[] ls = getMockAdlFileSystem() + .listStatus(new Path("/test1/test2")); + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertEquals(10, ls.length); + + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(200))); + startTime = Time.monotonicNow(); + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertEquals(200, ls.length); + + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(2048))); + startTime = Time.monotonicNow(); + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertEquals(2048, ls.length); + } + + @Test + public void listStatusOnFailure() throws IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(403).setBody( + TestADLResponseData.getErrorIllegalArgumentExceptionJSONResponse())); + FileStatus[] ls = null; + long startTime = Time.monotonicNow(); + try { + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Invalid")); + } + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + + // SDK may increase number of retry attempts before error is propagated + // to caller. Adding max 10 error responses in the queue to align with SDK. + for (int i = 0; i < 10; ++i) { + getMockServer().enqueue(new MockResponse().setResponseCode(500).setBody( + TestADLResponseData.getErrorInternalServerExceptionJSONResponse())); + } + + startTime = Time.monotonicNow(); + try { + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Internal Server Error")); + } + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + } + + @Test + public void listStatusAclBit() + throws URISyntaxException, IOException { + // With ACLBIT set to true + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(true))); + FileStatus[] ls = null; + long startTime = Time.monotonicNow(); + ls = getMockAdlFileSystem() + .listStatus(new Path("/test1/test2")); + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + for (int i = 0; i < ls.length; i++) { + Assert.assertTrue(ls[i].isDirectory()); + Assert.assertEquals(true, ls[i].getPermission().getAclBit()); + } + + // With ACLBIT set to false + ls = null; + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(false))); + startTime = Time.monotonicNow(); + ls = getMockAdlFileSystem() + .listStatus(new Path("/test1/test2")); + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + for (int i = 0; i < ls.length; i++) { + Assert.assertTrue(ls[i].isDirectory()); + Assert.assertEquals(false, ls[i].getPermission().getAclBit()); + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java new file mode 100644 index 0000000..908f8b8 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; + +/** + * This class verifies path conversion to SDK. + */ +public class TestRelativePathFormation { + + @Test + public void testToRelativePath() throws URISyntaxException, IOException { + AdlFileSystem fs = new AdlFileSystem(); + Configuration configuration = new Configuration(); + configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + "org.apache.hadoop.fs.adl.common.CustomMockTokenProvider"); + + fs.initialize(new URI("adl://temp.account.net"), configuration); + + Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr"))); + Assert.assertEquals("/usr", + fs.toRelativeFilePath(new Path("adl://temp.account.net/usr"))); + + // When working directory is set. + fs.setWorkingDirectory(new Path("/a/b/")); + Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr"))); + Assert.assertEquals("/a/b/usr", fs.toRelativeFilePath(new Path("usr"))); + Assert.assertEquals("/usr", + fs.toRelativeFilePath(new Path("adl://temp.account.net/usr"))); + Assert.assertEquals("/usr", + fs.toRelativeFilePath(new Path("wasb://temp.account.net/usr"))); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java new file mode 100644 index 0000000..4cabaa3 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_ENABLEUPN_FOR_OWNERGROUP_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_REPLICATION_FACTOR; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_TYPE_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_READ_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_WRITE_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .TOKEN_PROVIDER_TYPE_CLIENT_CRED; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .TOKEN_PROVIDER_TYPE_REFRESH_TOKEN; +import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY; + +/** + * Validate configuration keys defined for adl storage file system instance. + */ +public class TestValidateConfiguration { + + @Test + public void validateConfigurationKeys() { + Assert + .assertEquals("dfs.adls.oauth2.refresh.url", AZURE_AD_REFRESH_URL_KEY); + Assert.assertEquals("dfs.adls.oauth2.access.token.provider", + AZURE_AD_TOKEN_PROVIDER_CLASS_KEY); + Assert.assertEquals("dfs.adls.oauth2.client.id", AZURE_AD_CLIENT_ID_KEY); + Assert.assertEquals("dfs.adls.oauth2.refresh.token", + AZURE_AD_REFRESH_TOKEN_KEY); + Assert + .assertEquals("dfs.adls.oauth2.credential", AZURE_AD_CLIENT_SECRET_KEY); + Assert.assertEquals("adl.debug.override.localuserasfileowner", + ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER); + + Assert.assertEquals("dfs.adls.oauth2.access.token.provider.type", + AZURE_AD_TOKEN_PROVIDER_TYPE_KEY); + + Assert.assertEquals("adl.feature.client.cache.readahead", + READ_AHEAD_BUFFER_SIZE_KEY); + + Assert.assertEquals("adl.feature.client.cache.drop.behind.writes", + WRITE_BUFFER_SIZE_KEY); + + Assert.assertEquals("RefreshToken", TOKEN_PROVIDER_TYPE_REFRESH_TOKEN); + + Assert.assertEquals("ClientCredential", TOKEN_PROVIDER_TYPE_CLIENT_CRED); + + Assert.assertEquals("adl.dfs.enable.client.latency.tracker", + LATENCY_TRACKER_KEY); + + Assert.assertEquals(true, LATENCY_TRACKER_DEFAULT); + + Assert.assertEquals(true, ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT); + + Assert.assertEquals("adl.feature.experiment.positional.read.enable", + ADL_EXPERIMENT_POSITIONAL_READ_KEY); + + Assert.assertEquals(1, ADL_REPLICATION_FACTOR); + Assert.assertEquals(256 * 1024 * 1024, ADL_BLOCK_SIZE); + Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT); + Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE); + Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE); + + Assert.assertEquals("adl.feature.ownerandgroup.enableupn", + ADL_ENABLEUPN_FOR_OWNERGROUP_KEY); + Assert.assertEquals(false, + ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/DtpHttp2Handler.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java similarity index 65% rename from hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/DtpHttp2Handler.java rename to hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java index 5b6f279..4acb39b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/web/dtp/DtpHttp2Handler.java +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -14,21 +14,17 @@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. + * */ -package org.apache.hadoop.hdfs.server.datanode.web.dtp; -import org.apache.hadoop.classification.InterfaceAudience; - -import io.netty.handler.codec.http2.Http2ConnectionHandler; +package org.apache.hadoop.fs.adl; /** - * The HTTP/2 handler. + * Mock adl file storage subclass to mock adl storage on local http service. */ -@InterfaceAudience.Private -public class DtpHttp2Handler extends Http2ConnectionHandler { - - public DtpHttp2Handler() { - super(true, new DtpHttp2FrameListener()); - ((DtpHttp2FrameListener) decoder().listener()).encoder(encoder()); +public class TestableAdlFileSystem extends AdlFileSystem { + @Override + protected String getTransportScheme() { + return "http"; } } diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java new file mode 100644 index 0000000..c48ca0e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl.common; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; + +import java.io.IOException; +import java.util.Date; +import java.util.Random; + +/** + * Custom token management without cache enabled. + */ +public class CustomMockTokenProvider extends AzureADTokenProvider { + private Random random; + private long expiryTime; + private int accessTokenRequestCount = 0; + + @Override + public void initialize(Configuration configuration) throws IOException { + random = new Random(); + } + + @Override + public String getAccessToken() throws IOException { + accessTokenRequestCount++; + return String.valueOf(random.nextInt()); + } + + @Override + public Date getExpiryTime() { + Date before10Min = new Date(); + before10Min.setTime(expiryTime); + return before10Min; + } + + public void setExpiryTimeInMillisAfter(long timeInMillis) { + expiryTime = System.currentTimeMillis() + timeInMillis; + } + + public int getAccessTokenRequestCount() { + return accessTokenRequestCount; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java new file mode 100644 index 0000000..dc8577d --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.common; + +import com.squareup.okhttp.mockwebserver.MockResponse; + +import java.util.ArrayList; + +/** + * Supporting class to hold expected MockResponse object along with parameters + * for validation in test methods. + */ +public class ExpectedResponse { + private MockResponse response; + private ArrayList expectedQueryParameters = new ArrayList(); + private int expectedBodySize; + private String httpRequestType; + + public int getExpectedBodySize() { + return expectedBodySize; + } + + public String getHttpRequestType() { + return httpRequestType; + } + + public ArrayList getExpectedQueryParameters() { + return expectedQueryParameters; + } + + public MockResponse getResponse() { + return response; + } + + ExpectedResponse set(MockResponse mockResponse) { + this.response = mockResponse; + return this; + } + + ExpectedResponse addExpectedQueryParam(String param) { + expectedQueryParameters.add(param); + return this; + } + + ExpectedResponse addExpectedBodySize(int bodySize) { + this.expectedBodySize = bodySize; + return this; + } + + ExpectedResponse addExpectedHttpRequestType(String expectedHttpRequestType) { + this.httpRequestType = expectedHttpRequestType; + return this; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java new file mode 100644 index 0000000..b08a892 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.common; + +import org.junit.runners.Parameterized; +import org.junit.runners.model.RunnerScheduler; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Provided for convenience to execute parametrized test cases concurrently. + */ +public class Parallelized extends Parameterized { + + public Parallelized(Class classObj) throws Throwable { + super(classObj); + setScheduler(new ThreadPoolScheduler()); + } + + private static class ThreadPoolScheduler implements RunnerScheduler { + private ExecutorService executor; + + public ThreadPoolScheduler() { + int numThreads = 10; + executor = Executors.newFixedThreadPool(numThreads); + } + + public void finished() { + executor.shutdown(); + try { + executor.awaitTermination(10, TimeUnit.MINUTES); + } catch (InterruptedException exc) { + throw new RuntimeException(exc); + } + } + + public void schedule(Runnable childStatement) { + executor.submit(childStatement); + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java new file mode 100644 index 0000000..509b3f0 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.common; + +import com.squareup.okhttp.mockwebserver.Dispatcher; +import com.squareup.okhttp.mockwebserver.MockResponse; +import com.squareup.okhttp.mockwebserver.RecordedRequest; +import okio.Buffer; +import org.apache.hadoop.fs.adl.TestADLResponseData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Supporting class for mock test to validate Adls read operation. + */ +public class TestDataForRead { + private static final Logger LOG = LoggerFactory + .getLogger(TestDataForRead.class); + + private byte[] actualData; + private ArrayList responses; + private Dispatcher dispatcher; + private int intensityOfTest; + private boolean checkOfNoOfCalls; + private int expectedNoNetworkCall; + + public TestDataForRead(final byte[] actualData, int expectedNoNetworkCall, + int intensityOfTest, boolean checkOfNoOfCalls) { + + this.checkOfNoOfCalls = checkOfNoOfCalls; + this.actualData = actualData; + responses = new ArrayList(); + this.expectedNoNetworkCall = expectedNoNetworkCall; + this.intensityOfTest = intensityOfTest; + + dispatcher = new Dispatcher() { + @Override + public MockResponse dispatch(RecordedRequest recordedRequest) + throws InterruptedException { + + if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) { + return new MockResponse().setResponseCode(200).setBody( + TestADLResponseData + .getGetFileStatusJSONResponse(actualData.length)); + } + + if (recordedRequest.getRequestLine().contains("op=OPEN")) { + String request = recordedRequest.getRequestLine(); + int offset = 0; + int byteCount = 0; + + Pattern pattern = Pattern.compile("offset=([0-9]+)"); + Matcher matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + offset = Integer.parseInt(matcher.group(1)); + } + + pattern = Pattern.compile("length=([0-9]+)"); + matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + byteCount = Integer.parseInt(matcher.group(1)); + } + + Buffer buf = new Buffer(); + buf.write(actualData, offset, + Math.min(actualData.length - offset, byteCount)); + return new MockResponse().setResponseCode(200) + .setChunkedBody(buf, 4 * 1024 * 1024); + } + + return new MockResponse().setBody("NOT SUPPORTED").setResponseCode(501); + } + }; + } + + public boolean isCheckOfNoOfCalls() { + return checkOfNoOfCalls; + } + + public int getExpectedNoNetworkCall() { + return expectedNoNetworkCall; + } + + public int getIntensityOfTest() { + return intensityOfTest; + } + + public byte[] getActualData() { + return actualData; + } + + public ArrayList getResponses() { + return responses; + } + + public Dispatcher getDispatcher() { + return dispatcher; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java new file mode 100644 index 0000000..7d6153d --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.adl.AdlFileSystem; +import org.apache.hadoop.util.ReflectionUtils; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +/** + * Configure Adl storage file system. + */ +public final class AdlStorageConfiguration { + static final String CONTRACT_XML = "adls.xml"; + + private static final String CONTRACT_ENABLE_KEY = + "dfs.adl.test.contract.enable"; + private static final boolean CONTRACT_ENABLE_DEFAULT = false; + + private static final String FILE_SYSTEM_KEY = + String.format("test.fs.%s.name", AdlFileSystem.SCHEME); + + private static final String FILE_SYSTEM_IMPL_KEY = + String.format("fs.%s.impl", AdlFileSystem.SCHEME); + private static final Class FILE_SYSTEM_IMPL_DEFAULT = + AdlFileSystem.class; + + private static boolean isContractTestEnabled = false; + private static Configuration conf = null; + + private AdlStorageConfiguration() { + } + + public synchronized static Configuration getConfiguration() { + Configuration newConf = new Configuration(); + newConf.addResource(CONTRACT_XML); + return newConf; + } + + public synchronized static boolean isContractTestEnabled() { + if (conf == null) { + conf = getConfiguration(); + } + + isContractTestEnabled = conf.getBoolean(CONTRACT_ENABLE_KEY, + CONTRACT_ENABLE_DEFAULT); + return isContractTestEnabled; + } + + public synchronized static FileSystem createStorageConnector() + throws URISyntaxException, IOException { + if (conf == null) { + conf = getConfiguration(); + } + + if (!isContractTestEnabled()) { + return null; + } + + String fileSystem = conf.get(FILE_SYSTEM_KEY); + if (fileSystem == null || fileSystem.trim().length() == 0) { + throw new IOException("Default file system not configured."); + } + + Class clazz = conf.getClass(FILE_SYSTEM_IMPL_KEY, + FILE_SYSTEM_IMPL_DEFAULT); + FileSystem fs = (FileSystem) ReflectionUtils.newInstance(clazz, conf); + fs.initialize(new URI(fileSystem), conf); + return fs; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java new file mode 100644 index 0000000..262b636 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +import java.io.IOException; +import java.net.URISyntaxException; + +class AdlStorageContract extends AbstractFSContract { + private FileSystem fs; + + protected AdlStorageContract(Configuration conf) { + super(conf); + try { + fs = AdlStorageConfiguration.createStorageConnector(); + } catch (URISyntaxException e) { + throw new IllegalStateException("Can not initialize ADL FileSystem. " + + "Please check test.fs.adl.name property.", e); + } catch (IOException e) { + throw new IllegalStateException("Can not initialize ADL FileSystem.", e); + } + this.setConf(AdlStorageConfiguration.getConfiguration()); + } + + @Override + public String getScheme() { + return "adl"; + } + + @Override + public FileSystem getTestFileSystem() throws IOException { + return this.fs; + } + + @Override + public Path getTestPath() { + return new Path("/test"); + } + + @Override + public boolean isEnabled() { + return AdlStorageConfiguration.isContractTestEnabled(); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java new file mode 100644 index 0000000..ffe6dd3 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractAppendTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Test; + +/** + * Test Append on Adl file system. + */ +public class TestAdlContractAppendLive extends AbstractContractAppendTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Override + @Test + public void testRenameFileBeingAppended() throws Throwable { + ContractTestUtils.unsupported("Skipping since renaming file in append " + + "mode not supported in Adl"); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java new file mode 100644 index 0000000..60d30ac --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractConcatTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.junit.Test; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; + +/** + * Test concat on Adl file system. + */ +public class TestAdlContractConcatLive extends AbstractContractConcatTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Test + public void testConcatMissingTarget() throws Throwable { + Path testPath = path("test"); + Path zeroByteFile = new Path(testPath, "zero.txt"); + Path target = new Path(testPath, "target"); + touch(getFileSystem(), zeroByteFile); + // Concat on missing target is allowed on Adls file system. + getFileSystem().concat(target, new Path[] {zeroByteFile}); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java new file mode 100644 index 0000000..06347e9 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCreateTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test creating files, overwrite options. + */ +public class TestAdlContractCreateLive extends AbstractContractCreateTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java new file mode 100644 index 0000000..6961f15 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test delete contract test. + */ +public class TestAdlContractDeleteLive extends AbstractContractDeleteTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractGetFileStatusLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractGetFileStatusLive.java new file mode 100644 index 0000000..d50dd68 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractGetFileStatusLive.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test getFileStatus contract test. + */ +public class TestAdlContractGetFileStatusLive extends + AbstractContractGetFileStatusTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java new file mode 100644 index 0000000..5e760c5 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test Mkdir contract on Adl storage file system. + */ +public class TestAdlContractMkdirLive extends AbstractContractMkdirTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new AdlStorageContract(conf); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java new file mode 100644 index 0000000..7a35d2c --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractOpenTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test OPEN - read API. + */ +public class TestAdlContractOpenLive extends AbstractContractOpenTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java new file mode 100644 index 0000000..d72d35e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test rename contract test cases on Adl file system. + */ +public class TestAdlContractRenameLive extends AbstractContractRenameTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java new file mode 100644 index 0000000..8ebc632 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test operation on root level. + */ +public class TestAdlContractRootDirLive + extends AbstractContractRootDirectoryTest { + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java new file mode 100644 index 0000000..62423b6 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractSeekTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Test seek operation on Adl file system. + */ +public class TestAdlContractSeekLive extends AbstractContractSeekTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java new file mode 100644 index 0000000..5421e0b --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.Parallelized; +import org.junit.Assert; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; +import java.util.UUID; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY; + +/** + * Verify data integrity with different data sizes with buffer size. + */ +@RunWith(Parallelized.class) +public class TestAdlDifferentSizeWritesLive { + private static Random rand = new Random(); + private int totalSize; + private int chunkSize; + + public TestAdlDifferentSizeWritesLive(int totalSize, int chunkSize) { + this.totalSize = totalSize; + this.chunkSize = chunkSize; + } + + public static byte[] getRandomByteArrayData(int size) { + byte[] b = new byte[size]; + rand.nextBytes(b); + return b; + } + + @Parameterized.Parameters(name = "{index}: Data Size [{0}] ; Chunk Size " + + "[{1}]") + public static Collection testDataForIntegrityTest() { + return Arrays.asList( + new Object[][] {{4 * 1024, 1 * 1024}, {4 * 1024, 7 * 1024}, + {4 * 1024, 10}, {2 * 1024, 10}, {1 * 1024, 10}, {100, 1}, + {4 * 1024, 1 * 1024}, {7 * 1024, 2 * 1024}, {9 * 1024, 2 * 1024}, + {10 * 1024, 3 * 1024}, {10 * 1024, 1 * 1024}, + {10 * 1024, 8 * 1024}}); + } + + @BeforeClass + public static void cleanUpParent() throws IOException, URISyntaxException { + if (AdlStorageConfiguration.isContractTestEnabled()) { + Path path = new Path("/test/dataIntegrityCheck/"); + FileSystem fs = AdlStorageConfiguration.createStorageConnector(); + fs.delete(path, true); + } + } + + @Before + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + } + + @Test + public void testDataIntegrity() throws IOException { + Path path = new Path( + "/test/dataIntegrityCheck/" + UUID.randomUUID().toString()); + FileSystem fs = null; + AdlStorageConfiguration.getConfiguration() + .setInt(WRITE_BUFFER_SIZE_KEY, 4 * 1024); + try { + fs = AdlStorageConfiguration.createStorageConnector(); + } catch (URISyntaxException e) { + throw new IllegalStateException("Can not initialize ADL FileSystem. " + + "Please check test.fs.adl.name property.", e); + } + byte[] expectedData = getRandomByteArrayData(totalSize); + + FSDataOutputStream out = fs.create(path, true); + int iteration = totalSize / chunkSize; + int reminderIteration = totalSize % chunkSize; + int offset = 0; + for (int i = 0; i < iteration; ++i) { + out.write(expectedData, offset, chunkSize); + offset += chunkSize; + } + + out.write(expectedData, offset, reminderIteration); + out.close(); + + byte[] actualData = new byte[totalSize]; + FSDataInputStream in = fs.open(path); + in.readFully(0, actualData); + in.close(); + Assert.assertArrayEquals(expectedData, actualData); + Assert.assertTrue(fs.delete(path, true)); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileContextCreateMkdirLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileContextCreateMkdirLive.java new file mode 100644 index 0000000..5166de1 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileContextCreateMkdirLive.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DelegateToFileSystem; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest; +import org.apache.hadoop.fs.FileContextTestHelper; +import org.apache.hadoop.fs.FileSystem; +import org.junit.Assume; +import org.junit.BeforeClass; + +import java.net.URI; +import java.util.UUID; + +/** + * Test file context Create/Mkdir operation. + */ +public class TestAdlFileContextCreateMkdirLive + extends FileContextCreateMkdirBaseTest { + private static final String KEY_FILE_SYSTEM = "test.fs.adl.name"; + + @BeforeClass + public static void skipTestCheck() { + Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + } + + @Override + public void setUp() throws Exception { + Configuration conf = AdlStorageConfiguration.getConfiguration(); + String fileSystem = conf.get(KEY_FILE_SYSTEM); + if (fileSystem == null || fileSystem.trim().length() == 0) { + throw new Exception("Default file system not configured."); + } + URI uri = new URI(fileSystem); + FileSystem fs = AdlStorageConfiguration.createStorageConnector(); + fc = FileContext.getFileContext( + new DelegateToFileSystem(uri, fs, conf, fs.getScheme(), false) { + }, conf); + super.setUp(); + } + + @Override + protected FileContextTestHelper createFileContextHelper() { + // On Windows, root directory path is created from local running directory. + // Adl does not support ':' as part of the path which results in failure. + return new FileContextTestHelper(UUID.randomUUID().toString()); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileContextMainOperationsLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileContextMainOperationsLive.java new file mode 100644 index 0000000..ee10da7 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileContextMainOperationsLive.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.*; +import org.junit.Assume; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.util.UUID; + +import static org.apache.hadoop.util.Shell.WINDOWS; + +/** + * Run collection of tests for the {@link FileContext}. + */ +public class TestAdlFileContextMainOperationsLive + extends FileContextMainOperationsBaseTest { + + private static final String KEY_FILE_SYSTEM = "test.fs.adl.name"; + + @BeforeClass + public static void skipTestCheck() { + Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + } + + @Override + public void setUp() throws Exception { + Configuration conf = AdlStorageConfiguration.getConfiguration(); + String fileSystem = conf.get(KEY_FILE_SYSTEM); + if (fileSystem == null || fileSystem.trim().length() == 0) { + throw new Exception("Default file system not configured."); + } + URI uri = new URI(fileSystem); + FileSystem fs = AdlStorageConfiguration.createStorageConnector(); + fc = FileContext.getFileContext( + new DelegateToFileSystem(uri, fs, conf, fs.getScheme(), false) { + }, conf); + super.setUp(); + } + + @Override + protected FileContextTestHelper createFileContextHelper() { + // On Windows, root directory path is created from local running directory. + // Adl does not support ':' as part of the path which results in failure. + // return new FileContextTestHelper(GenericTestUtils + // .getRandomizedTestDir() + // .getAbsolutePath().replaceAll(":","")); + return new FileContextTestHelper(UUID.randomUUID().toString()); + } + + @Override + protected boolean listCorruptedBlocksSupported() { + return false; + } + + @Override + public void testWorkingDirectory() throws Exception { + if (WINDOWS) { + // TODO :Fix is required in Hadoop shell to support windows permission + // set. + // The test is failing with NPE on windows platform only, with Linux + // platform test passes. + Assume.assumeTrue(false); + } else { + super.testWorkingDirectory(); + } + } + + @Override + public void testUnsupportedSymlink() throws IOException { + Assume.assumeTrue(false); + } + + @Test + public void testSetVerifyChecksum() throws IOException { + Assume.assumeTrue(false); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java new file mode 100644 index 0000000..88bacd9 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemContractBaseTest; +import org.apache.hadoop.fs.Path; +import org.junit.Assume; +import org.junit.Before; + +import java.io.IOException; + +/** + * Test Base contract tests on Adl file system. + */ +public class TestAdlFileSystemContractLive extends FileSystemContractBaseTest { + private FileSystem adlStore; + + @Override + protected void setUp() throws Exception { + adlStore = AdlStorageConfiguration.createStorageConnector(); + if (AdlStorageConfiguration.isContractTestEnabled()) { + fs = adlStore; + } + } + + @Override + protected void tearDown() throws Exception { + if (AdlStorageConfiguration.isContractTestEnabled()) { + cleanup(); + adlStore = null; + fs = null; + } + } + + private void cleanup() throws IOException { + adlStore.delete(new Path("/test"), true); + } + + @Override + protected void runTest() throws Throwable { + if (AdlStorageConfiguration.isContractTestEnabled()) { + super.runTest(); + } + } + + @Before + public void skipTestCheck() { + Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlInternalCreateNonRecursive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlInternalCreateNonRecursive.java new file mode 100644 index 0000000..7e11a54 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlInternalCreateNonRecursive.java @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.Parallelized; +import org.apache.hadoop.fs.permission.FsPermission; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.Arrays; +import java.util.Collection; +import java.util.UUID; + +/** + * Test createNonRecursive API. + */ +@RunWith(Parallelized.class) +public class TestAdlInternalCreateNonRecursive { + private Path inputFileName; + private FsPermission inputPermission; + private boolean inputOverride; + private boolean inputFileAlreadyExist; + private boolean inputParentAlreadyExist; + private Class expectedExceptionType; + private FileSystem adlStore; + + public TestAdlInternalCreateNonRecursive(String testScenario, String fileName, + FsPermission permission, boolean override, boolean fileAlreadyExist, + boolean parentAlreadyExist, Class exceptionType) { + + // Random parent path for each test so that parallel execution does not fail + // other running test. + inputFileName = new Path( + "/test/createNonRecursive/" + UUID.randomUUID().toString(), fileName); + inputPermission = permission; + inputFileAlreadyExist = fileAlreadyExist; + inputOverride = override; + inputParentAlreadyExist = parentAlreadyExist; + expectedExceptionType = exceptionType; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection adlCreateNonRecursiveTestData() + throws UnsupportedEncodingException { + /* + Test Data + File name, Permission, Override flag, File already exist, Parent + already exist + shouldCreateSucceed, expectedExceptionIfFileCreateFails + + File already exist and Parent already exist are mutually exclusive. + */ + return Arrays.asList(new Object[][] { + {"CNR - When file do not exist.", UUID.randomUUID().toString(), + FsPermission.getFileDefault(), false, false, true, null}, + {"CNR - When file exist. Override false", UUID.randomUUID().toString(), + FsPermission.getFileDefault(), false, true, true, + FileAlreadyExistsException.class}, + {"CNR - When file exist. Override true", UUID.randomUUID().toString(), + FsPermission.getFileDefault(), true, true, true, null}, + + //TODO: This test is skipped till the fixes are not made it to prod. + /*{ "CNR - When parent do no exist.", UUID.randomUUID().toString(), + FsPermission.getFileDefault(), false, false, true, false, + IOException.class }*/}); + } + + @Before + public void setUp() throws Exception { + Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + adlStore = AdlStorageConfiguration.createStorageConnector(); + } + + @Test + public void testCreateNonRecursiveFunctionality() throws IOException { + if (inputFileAlreadyExist) { + FileSystem.create(adlStore, inputFileName, inputPermission); + } + + // Mutually exclusive to inputFileAlreadyExist + if (inputParentAlreadyExist) { + adlStore.mkdirs(inputFileName.getParent()); + } else { + adlStore.delete(inputFileName.getParent(), true); + } + + try { + adlStore.createNonRecursive(inputFileName, inputPermission, inputOverride, + CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT, + adlStore.getDefaultReplication(inputFileName), + adlStore.getDefaultBlockSize(inputFileName), null); + } catch (IOException e) { + + if (expectedExceptionType == null) { + throw e; + } + + Assert.assertEquals(expectedExceptionType, e.getClass()); + return; + } + + if (expectedExceptionType != null) { + Assert.fail("CreateNonRecursive should have failed with exception " + + expectedExceptionType.getName()); + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlPermissionLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlPermissionLive.java new file mode 100644 index 0000000..dd7c10d --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlPermissionLive.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.Parallelized; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.junit.*; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.UUID; + +/** + * Test ACL permission on file/folder on Adl file system. + */ +@RunWith(Parallelized.class) +public class TestAdlPermissionLive { + + private static Path testRoot = new Path("/test"); + private FsPermission permission; + private Path path; + private FileSystem adlStore; + + public TestAdlPermissionLive(FsPermission testPermission) { + permission = testPermission; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection adlCreateNonRecursiveTestData() + throws UnsupportedEncodingException { + /* + Test Data + File/Folder name, User permission, Group permission, Other Permission, + Parent already exist + shouldCreateSucceed, expectedExceptionIfFileCreateFails + */ + final Collection datas = new ArrayList<>(); + for (FsAction g : FsAction.values()) { + for (FsAction o : FsAction.values()) { + datas.add(new Object[] {new FsPermission(FsAction.ALL, g, o)}); + } + } + return datas; + } + + @AfterClass + public static void cleanUp() throws IOException, URISyntaxException { + if (AdlStorageConfiguration.isContractTestEnabled()) { + Assert.assertTrue(AdlStorageConfiguration.createStorageConnector() + .delete(testRoot, true)); + } + } + + @Before + public void setUp() throws Exception { + Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + adlStore = AdlStorageConfiguration.createStorageConnector(); + } + + @Test + public void testFilePermission() throws IOException { + path = new Path(testRoot, UUID.randomUUID().toString()); + adlStore.getConf() + .set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); + + adlStore.mkdirs(path.getParent(), + new FsPermission(FsAction.ALL, FsAction.WRITE, FsAction.NONE)); + adlStore.removeDefaultAcl(path.getParent()); + + adlStore.create(path, permission, true, 1024, (short) 1, 1023, null); + FileStatus status = adlStore.getFileStatus(path); + Assert.assertEquals(permission, status.getPermission()); + } + + @Test + public void testFolderPermission() throws IOException { + path = new Path(testRoot, UUID.randomUUID().toString()); + adlStore.getConf() + .set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "000"); + adlStore.mkdirs(path.getParent(), + new FsPermission(FsAction.ALL, FsAction.WRITE, FsAction.NONE)); + adlStore.removeDefaultAcl(path.getParent()); + + adlStore.mkdirs(path, permission); + FileStatus status = adlStore.getFileStatus(path); + Assert.assertEquals(permission, status.getPermission()); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlSupportedCharsetInPath.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlSupportedCharsetInPath.java new file mode 100644 index 0000000..f94fa1b --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlSupportedCharsetInPath.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.Parallelized; +import org.junit.AfterClass; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URISyntaxException; +import java.util.*; + +/** + * Test supported ASCII, UTF-8 character set supported by Adl storage file + * system on file/folder operation. + */ +@RunWith(Parallelized.class) +public class TestAdlSupportedCharsetInPath { + + private static final String TEST_ROOT = "/test/"; + private static final Logger LOG = LoggerFactory + .getLogger(TestAdlSupportedCharsetInPath.class); + private String path; + + public TestAdlSupportedCharsetInPath(String filePath) { + path = filePath; + } + + @Parameterized.Parameters(name = "{0}") + public static Collection adlCharTestData() + throws UnsupportedEncodingException { + + ArrayList filePathList = new ArrayList<>(); + for (int i = 32; i < 127; ++i) { + String specialChar = (char) i + ""; + if (i >= 48 && i <= 57) { + continue; + } + + if (i >= 65 && i <= 90) { + continue; + } + + if (i >= 97 && i <= 122) { + continue; + } + + // Special char at start of the path + if (i != 92 && i != 58 && i != 46 && i != 47) { + filePathList.add(specialChar + ""); + } + + // Special char at end of string + if (i != 92 && i != 47 && i != 58) { + filePathList.add("file " + i + " " + specialChar); + } + + // Special char in between string + if (i != 47 && i != 58 && i != 92) { + filePathList.add("file " + i + " " + specialChar + "_name"); + } + } + + filePathList.add("a "); + filePathList.add("a..b"); + fillUnicodes(filePathList); + Collection result = new ArrayList<>(); + for (String item : filePathList) { + result.add(new Object[] {item}); + } + return result; + } + + private static void fillUnicodes(ArrayList filePathList) { + // Unicode characters + filePathList.add("البيانات الكبيرة"); // Arabic + filePathList.add("Të dhënat i madh"); // Albanian + filePathList.add("մեծ տվյալները"); // Armenian + filePathList.add("böyük data"); // Azerbaijani + filePathList.add("вялікія дадзеныя"); // Belarusian, + filePathList.add("বিগ ডেটা"); // Bengali + filePathList.add("veliki podataka"); // Bosnian + filePathList.add("голяма данни"); // Bulgarian + filePathList.add("大数据"); // Chinese - Simplified + filePathList.add("大數據"); // Chinese - Traditional + filePathList.add("დიდი მონაცემთა"); // Georgian, + filePathList.add("große Daten"); // German + filePathList.add("μεγάλο δεδομένα"); // Greek + filePathList.add("મોટા માહિતી"); // Gujarati + filePathList.add("נתונים גדולים"); // Hebrew + filePathList.add("बड़ा डेटा"); // Hindi + filePathList.add("stór gögn"); // Icelandic + filePathList.add("sonraí mór"); // Irish + filePathList.add("ビッグデータ"); // Japanese + filePathList.add("үлкен деректер"); // Kazakh + filePathList.add("ទិន្នន័យធំ"); // Khmer + filePathList.add("빅 데이터"); // Korean + filePathList.add("ຂໍ້ມູນ ຂະຫນາດໃຫຍ່"); // Lao + filePathList.add("големи податоци"); // Macedonian + filePathList.add("ठूलो डाटा"); // Nepali + filePathList.add("വലിയ ഡാറ്റ"); // Malayalam + filePathList.add("मोठे डेटा"); // Marathi + filePathList.add("том мэдээлэл"); // Mangolian + filePathList.add("اطلاعات بزرگ"); // Persian + filePathList.add("ਵੱਡੇ ਡਾਟੇ ਨੂੰ"); // Punjabi + filePathList.add("большие данные"); // Russian + filePathList.add("Велики података"); // Serbian + filePathList.add("විශාල දත්ත"); // Sinhala + filePathList.add("big dát"); // Slovak + filePathList.add("маълумоти калон"); // Tajik + filePathList.add("பெரிய தரவு"); // Tamil + filePathList.add("పెద్ద డేటా"); // Telugu + filePathList.add("ข้อมูลใหญ่"); // Thai + filePathList.add("büyük veri"); // Turkish + filePathList.add("великі дані"); // Ukranian + filePathList.add("بڑے اعداد و شمار"); // Urdu + filePathList.add("katta ma'lumotlar"); // Uzbek + filePathList.add("dữ liệu lớn"); // Vietanamese + filePathList.add("גרויס דאַטן"); // Yiddish + filePathList.add("big idatha"); // Zulu + filePathList.add("rachelχ"); + filePathList.add("jessicaο"); + filePathList.add("sarahδ"); + filePathList.add("katieν"); + filePathList.add("wendyξ"); + filePathList.add("davidμ"); + filePathList.add("priscillaυ"); + filePathList.add("oscarθ"); + filePathList.add("xavierχ"); + filePathList.add("gabriellaθ"); + filePathList.add("davidυ"); + filePathList.add("ireneμ"); + filePathList.add("fredρ"); + filePathList.add("davidτ"); + filePathList.add("ulyssesν"); + filePathList.add("gabriellaμ"); + filePathList.add("zachζ"); + filePathList.add("gabriellaλ"); + filePathList.add("ulyssesφ"); + filePathList.add("davidχ"); + filePathList.add("sarahσ"); + filePathList.add("hollyψ"); + filePathList.add("nickα"); + filePathList.add("ulyssesι"); + filePathList.add("mikeβ"); + filePathList.add("priscillaκ"); + filePathList.add("wendyθ"); + filePathList.add("jessicaς"); + filePathList.add("fredχ"); + filePathList.add("fredζ"); + filePathList.add("sarahκ"); + filePathList.add("calvinη"); + filePathList.add("xavierχ"); + filePathList.add("yuriχ"); + filePathList.add("ethanλ"); + filePathList.add("hollyε"); + filePathList.add("xavierσ"); + filePathList.add("victorτ"); + filePathList.add("wendyβ"); + filePathList.add("jessicaς"); + filePathList.add("quinnφ"); + filePathList.add("xavierυ"); + filePathList.add("nickι"); + filePathList.add("rachelφ"); + filePathList.add("oscarξ"); + filePathList.add("zachδ"); + filePathList.add("zachλ"); + filePathList.add("rachelα"); + filePathList.add("jessicaφ"); + filePathList.add("lukeφ"); + filePathList.add("tomζ"); + filePathList.add("nickξ"); + filePathList.add("nickκ"); + filePathList.add("ethanδ"); + filePathList.add("fredχ"); + filePathList.add("priscillaθ"); + filePathList.add("zachξ"); + filePathList.add("xavierξ"); + filePathList.add("zachψ"); + filePathList.add("ethanα"); + filePathList.add("oscarι"); + filePathList.add("ireneδ"); + filePathList.add("ireneζ"); + filePathList.add("victorο"); + filePathList.add("wendyβ"); + filePathList.add("mikeσ"); + filePathList.add("fredο"); + filePathList.add("mikeη"); + filePathList.add("sarahρ"); + filePathList.add("quinnβ"); + filePathList.add("mikeυ"); + filePathList.add("nickζ"); + filePathList.add("nickο"); + filePathList.add("tomκ"); + filePathList.add("bobλ"); + filePathList.add("yuriπ"); + filePathList.add("davidτ"); + filePathList.add("quinnπ"); + filePathList.add("mikeλ"); + filePathList.add("davidη"); + filePathList.add("ethanτ"); + filePathList.add("nickφ"); + filePathList.add("yuriο"); + filePathList.add("ethanυ"); + filePathList.add("bobθ"); + filePathList.add("davidλ"); + filePathList.add("priscillaξ"); + filePathList.add("nickγ"); + filePathList.add("lukeυ"); + filePathList.add("ireneλ"); + filePathList.add("xavierο"); + filePathList.add("fredυ"); + filePathList.add("ulyssesμ"); + filePathList.add("wendyγ"); + filePathList.add("zachλ"); + filePathList.add("rachelς"); + filePathList.add("sarahπ"); + filePathList.add("aliceψ"); + filePathList.add("bobτ"); + } + + @AfterClass + public static void testReport() throws IOException, URISyntaxException { + if (!AdlStorageConfiguration.isContractTestEnabled()) { + return; + } + + FileSystem fs = AdlStorageConfiguration.createStorageConnector(); + fs.delete(new Path(TEST_ROOT), true); + } + + @Test + public void testAllowedSpecialCharactersMkdir() + throws IOException, URISyntaxException { + Path parentPath = new Path(TEST_ROOT, UUID.randomUUID().toString() + "/"); + Path specialFile = new Path(parentPath, path); + FileSystem fs = AdlStorageConfiguration.createStorageConnector(); + + Assert.assertTrue("Mkdir failed : " + specialFile, fs.mkdirs(specialFile)); + Assert.assertTrue("File not Found after Mkdir success" + specialFile, + fs.exists(specialFile)); + Assert.assertTrue("Not listed under parent " + parentPath, + contains(fs.listStatus(parentPath), + fs.makeQualified(specialFile).toString())); + Assert.assertTrue("Delete failed : " + specialFile, + fs.delete(specialFile, true)); + Assert.assertFalse("File still exist after delete " + specialFile, + fs.exists(specialFile)); + } + + private boolean contains(FileStatus[] statuses, String remotePath) { + for (FileStatus status : statuses) { + if (status.getPath().toString().equals(remotePath)) { + return true; + } + } + + for (FileStatus status : statuses) { + LOG.info(status.getPath().toString()); + } + return false; + } + + @Before + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + } + + @Test + public void testAllowedSpecialCharactersRename() + throws IOException, URISyntaxException { + + String parentPath = TEST_ROOT + UUID.randomUUID().toString() + "/"; + Path specialFile = new Path(parentPath + path); + Path anotherLocation = new Path(parentPath + UUID.randomUUID().toString()); + FileSystem fs = AdlStorageConfiguration.createStorageConnector(); + + Assert.assertTrue("Could not create " + specialFile.toString(), + fs.createNewFile(specialFile)); + Assert.assertTrue( + "Failed to rename " + specialFile.toString() + " --> " + anotherLocation + .toString(), fs.rename(specialFile, anotherLocation)); + Assert.assertFalse("File should not be present after successful rename : " + + specialFile.toString(), fs.exists(specialFile)); + Assert.assertTrue("File should be present after successful rename : " + + anotherLocation.toString(), fs.exists(anotherLocation)); + Assert.assertFalse( + "Listed under parent whereas expected not listed : " + parentPath, + contains(fs.listStatus(new Path(parentPath)), + fs.makeQualified(specialFile).toString())); + + Assert.assertTrue( + "Failed to rename " + anotherLocation.toString() + " --> " + specialFile + .toString(), fs.rename(anotherLocation, specialFile)); + Assert.assertTrue( + "File should be present after successful rename : " + "" + specialFile + .toString(), fs.exists(specialFile)); + Assert.assertFalse("File should not be present after successful rename : " + + anotherLocation.toString(), fs.exists(anotherLocation)); + + Assert.assertTrue("Not listed under parent " + parentPath, + contains(fs.listStatus(new Path(parentPath)), + fs.makeQualified(specialFile).toString())); + + Assert.assertTrue("Failed to delete " + parentPath, + fs.delete(new Path(parentPath), true)); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java new file mode 100644 index 0000000..dbcaa39 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestMetadata.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.AdlFileSystem; +import org.junit.After; +import org.junit.Assert; +import org.junit.Assume; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.UUID; + +import static org.junit.Assert.fail; + +/** + * This class is responsible for testing ContentSummary, ListStatus on + * file/folder. + */ +public class TestMetadata { + + private FileSystem adlStore; + private Path parent; + + public TestMetadata() { + parent = new Path("test"); + } + + @Before + public void setUp() throws Exception { + Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + adlStore = AdlStorageConfiguration.createStorageConnector(); + } + + @After + public void cleanUp() throws Exception { + if (AdlStorageConfiguration.isContractTestEnabled()) { + adlStore.delete(parent, true); + } + } + + @Test + public void testContentSummaryOnFile() throws IOException { + Path child = new Path(UUID.randomUUID().toString()); + Path testFile = new Path(parent, child); + OutputStream out = adlStore.create(testFile); + + for (int i = 0; i < 1024; ++i) { + out.write(97); + } + out.close(); + + Assert.assertTrue(adlStore.isFile(testFile)); + ContentSummary summary = adlStore.getContentSummary(testFile); + Assert.assertEquals(1024, summary.getSpaceConsumed()); + Assert.assertEquals(1, summary.getFileCount()); + Assert.assertEquals(0, summary.getDirectoryCount()); + Assert.assertEquals(1024, summary.getLength()); + } + + @Test + public void testContentSummaryOnFolder() throws IOException { + Path child = new Path(UUID.randomUUID().toString()); + Path testFile = new Path(parent, child); + OutputStream out = adlStore.create(testFile); + + for (int i = 0; i < 1024; ++i) { + out.write(97); + } + out.close(); + + Assert.assertTrue(adlStore.isFile(testFile)); + ContentSummary summary = adlStore.getContentSummary(parent); + Assert.assertEquals(1024, summary.getSpaceConsumed()); + Assert.assertEquals(1, summary.getFileCount()); + Assert.assertEquals(1, summary.getDirectoryCount()); + Assert.assertEquals(1024, summary.getLength()); + } + + @Test + public void listStatusOnFile() throws IOException { + Path path = new Path(parent, "a.txt"); + FileSystem fs = adlStore; + fs.createNewFile(path); + Assert.assertTrue(fs.isFile(path)); + FileStatus[] statuses = fs.listStatus(path); + Assert + .assertEquals(path.makeQualified(fs.getUri(), fs.getWorkingDirectory()), + statuses[0].getPath()); + } + + @Test + public void testUserRepresentationConfiguration() throws IOException { + // Validating actual user/group OID or friendly name is outside scope of + // this test. + Path path = new Path(parent, "a.txt"); + AdlFileSystem fs = (AdlFileSystem) adlStore; + + // When set to true, User/Group information should be user friendly name. + // That is non GUID value. + fs.setUserGroupRepresentationAsUPN(false); + fs.createNewFile(path); + Assert.assertTrue(fs.isFile(path)); + FileStatus fileStatus = fs.getFileStatus(path); + UUID.fromString(fileStatus.getGroup()); + UUID.fromString(fileStatus.getOwner()); + + // When set to false, User/Group information should be AAD represented + // unique OID. That is GUID value. + // Majority of the cases, user friendly name would not be GUID value. + fs.setUserGroupRepresentationAsUPN(true); + fileStatus = fs.getFileStatus(path); + try { + UUID.fromString(fileStatus.getGroup()); + UUID.fromString(fileStatus.getOwner()); + fail("Expected user friendly name to be non guid value."); + } catch (IllegalArgumentException e) { + // expected to fail since + } + } +} + diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml b/hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml new file mode 100644 index 0000000..5bbdd6f --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml @@ -0,0 +1,151 @@ + + + + + + + + + fs.contract.test.root-tests-enabled + true + + + + fs.contract.test.supports-concat + true + + + + fs.contract.rename-returns-false-if-source-missing + true + + + + fs.contract.test.random-seek-count + 10 + + + + fs.contract.is-case-sensitive + true + + + + fs.contract.rename-returns-true-if-dest-exists + false + + + + fs.contract.rename-returns-true-if-source-missing + false + + + + fs.contract.rename-creates-dest-dirs + false + + + + fs.contract.rename-remove-dest-if-empty-dir + false + + + + fs.contract.supports-settimes + true + + + + fs.contract.supports-append + true + + + + fs.contract.supports-atomic-directory-delete + true + + + + fs.contract.supports-atomic-rename + true + + + + fs.contract.supports-block-locality + true + + + + fs.contract.supports-concat + true + + + + fs.contract.supports-seek + true + + + + fs.contract.supports-seek-on-closed-file + true + + + + fs.contract.rejects-seek-past-eof + true + + + + fs.contract.supports-available-on-closed-file + true + + + + fs.contract.supports-strict-exceptions + false + + + + fs.contract.supports-unix-permissions + true + + + + fs.contract.rename-overwrites-dest + false + + + + fs.contract.supports-append + true + + + + fs.azure.enable.append.support + true + + + + fs.contract.supports-getfilestatus + true + + + diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties b/hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties new file mode 100644 index 0000000..4cc8f7f --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger=DEBUG,stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} - %m%n +log4j.logger.your.app=* +log4j.additivity.your.app=false +log4j.logger.yourApp=* +log4j.additivity.yourApp=false +log4j.appender.yourApp=org.apache.log4j.ConsoleAppender +log4j.appender.yourApp.layout=org.apache.log4j.PatternLayout +log4j.appender.yourApp.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} %m%n +log4j.appender.yourApp.ImmediateFlush=true \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index 5673cd2..5076c93 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project hadoop-azure diff --git a/hadoop-tools/hadoop-datajoin/pom.xml b/hadoop-tools/hadoop-datajoin/pom.xml index 24f8be7..08bf442 100644 --- a/hadoop-tools/hadoop-datajoin/pom.xml +++ b/hadoop-tools/hadoop-datajoin/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-datajoin - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Data Join Apache Hadoop Data Join jar diff --git a/hadoop-tools/hadoop-distcp/pom.xml b/hadoop-tools/hadoop-distcp/pom.xml index 1ae01d4..d3ab5bb 100644 --- a/hadoop-tools/hadoop-distcp/pom.xml +++ b/hadoop-tools/hadoop-distcp/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-distcp - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Distributed Copy Apache Hadoop Distributed Copy jar diff --git a/hadoop-tools/hadoop-extras/pom.xml b/hadoop-tools/hadoop-extras/pom.xml index ba6f9c3..a23e3dd 100644 --- a/hadoop-tools/hadoop-extras/pom.xml +++ b/hadoop-tools/hadoop-extras/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-extras - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Extras Apache Hadoop Extras jar diff --git a/hadoop-tools/hadoop-gridmix/pom.xml b/hadoop-tools/hadoop-gridmix/pom.xml index 1832191..a3f5389 100644 --- a/hadoop-tools/hadoop-gridmix/pom.xml +++ b/hadoop-tools/hadoop-gridmix/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-gridmix - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Gridmix Apache Hadoop Gridmix jar diff --git a/hadoop-tools/hadoop-openstack/pom.xml b/hadoop-tools/hadoop-openstack/pom.xml index 84dd7eb..c3870fa 100644 --- a/hadoop-tools/hadoop-openstack/pom.xml +++ b/hadoop-tools/hadoop-openstack/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project hadoop-openstack - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop OpenStack support This module contains code to support integration with OpenStack. diff --git a/hadoop-tools/hadoop-pipes/pom.xml b/hadoop-tools/hadoop-pipes/pom.xml index 2f4661d..29991c4 100644 --- a/hadoop-tools/hadoop-pipes/pom.xml +++ b/hadoop-tools/hadoop-pipes/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-pipes - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Pipes Apache Hadoop Pipes pom diff --git a/hadoop-tools/hadoop-rumen/pom.xml b/hadoop-tools/hadoop-rumen/pom.xml index 382a338..c0bdaf4 100644 --- a/hadoop-tools/hadoop-rumen/pom.xml +++ b/hadoop-tools/hadoop-rumen/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-rumen - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Rumen Apache Hadoop Rumen jar diff --git a/hadoop-tools/hadoop-sls/pom.xml b/hadoop-tools/hadoop-sls/pom.xml index c61725c..a5d39e1 100644 --- a/hadoop-tools/hadoop-sls/pom.xml +++ b/hadoop-tools/hadoop-sls/pom.xml @@ -19,12 +19,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-sls - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Scheduler Load Simulator Apache Hadoop Scheduler Load Simulator jar diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index c63d142..f4531e1 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-streaming - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop MapReduce Streaming Apache Hadoop MapReduce Streaming jar diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index f19f313..20cc564 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project-dist - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project-dist org.apache.hadoop hadoop-tools-dist - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Tools Dist Apache Hadoop Tools Dist jar diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml index f85b1d9..fe1136b 100644 --- a/hadoop-tools/pom.xml +++ b/hadoop-tools/pom.xml @@ -20,12 +20,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-tools - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Tools Apache Hadoop Tools pom @@ -46,6 +46,7 @@ hadoop-sls hadoop-aws hadoop-azure + hadoop-azure-datalake diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml index 41df09e..f358741 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-api - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN API diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java index ad526d6..b2d765a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/ApplicationConstants.java @@ -171,13 +171,6 @@ LD_LIBRARY_PATH("LD_LIBRARY_PATH"), /** - * $YARN_RESOURCEMANAGER_APPLICATION_QUEUE - * The queue into which the app was submitted/launched. - */ - YARN_RESOURCEMANAGER_APPLICATION_QUEUE( - "YARN_RESOURCEMANAGER_APPLICATION_QUEUE"), - - /** * $HADOOP_CONF_DIR * Final, non-modifiable. */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetLabelsToNodesResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetLabelsToNodesResponse.java index da2be28..ef0bf60 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetLabelsToNodesResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetLabelsToNodesResponse.java @@ -29,7 +29,7 @@ public abstract class GetLabelsToNodesResponse { public static GetLabelsToNodesResponse newInstance( - Map> map) { + Map> map) { GetLabelsToNodesResponse response = Records.newRecord(GetLabelsToNodesResponse.class); response.setLabelsToNodes(map); @@ -38,9 +38,9 @@ public static GetLabelsToNodesResponse newInstance( @Public @Evolving - public abstract void setLabelsToNodes(Map> map); + public abstract void setLabelsToNodes(Map> map); @Public @Evolving - public abstract Map> getLabelsToNodes(); + public abstract Map> getLabelsToNodes(); } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToLabelsResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToLabelsResponse.java index 432485c..bcd5421 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToLabelsResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/GetNodesToLabelsResponse.java @@ -24,12 +24,11 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.yarn.api.records.NodeId; -import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.util.Records; public abstract class GetNodesToLabelsResponse { public static GetNodesToLabelsResponse newInstance( - Map> map) { + Map> map) { GetNodesToLabelsResponse response = Records.newRecord(GetNodesToLabelsResponse.class); response.setNodeToLabels(map); @@ -38,9 +37,9 @@ public static GetNodesToLabelsResponse newInstance( @Public @Evolving - public abstract void setNodeToLabels(Map> map); + public abstract void setNodeToLabels(Map> map); @Public @Evolving - public abstract Map> getNodeToLabels(); + public abstract Map> getNodeToLabels(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java index c1ea07d..f2ac395 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/AddToClusterNodeLabelsRequest.java @@ -30,17 +30,16 @@ public abstract class AddToClusterNodeLabelsRequest { @Public @Unstable - public static AddToClusterNodeLabelsRequest newInstance( - List NodeLabels) { - AddToClusterNodeLabelsRequest request = - Records.newRecord(AddToClusterNodeLabelsRequest.class); - request.setNodeLabels(NodeLabels); + public static AddToClusterNodeLabelsRequest newInstance(List nodeLabels) { + AddToClusterNodeLabelsRequest request = Records + .newRecord(AddToClusterNodeLabelsRequest.class); + request.setNodeLabels(nodeLabels); return request; } @Public @Unstable - public abstract void setNodeLabels(List NodeLabels); + public abstract void setNodeLabels(List nodeLabels); @Public @Unstable diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto index baacda1..e728fc8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto @@ -83,7 +83,8 @@ message RefreshNodesResourcesResponseProto { } message AddToClusterNodeLabelsRequestProto { - repeated NodeLabelProto nodeLabels = 1; + repeated string deprecatedNodeLabels = 1; + repeated NodeLabelProto nodeLabels = 2; } message AddToClusterNodeLabelsResponseProto { @@ -97,7 +98,7 @@ message RemoveFromClusterNodeLabelsResponseProto { } message ReplaceLabelsOnNodeRequestProto { - repeated NodeIdToLabelsNameProto nodeToLabels = 1; + repeated NodeIdToLabelsProto nodeToLabels = 1; optional bool failOnUnknownNodes = 2; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index cefaf55..8c847b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -267,13 +267,13 @@ message NodeReportProto { optional ResourceUtilizationProto node_utilization = 12; } -message NodeIdToLabelsInfoProto { +message NodeIdToLabelsProto { optional NodeIdProto nodeId = 1; - repeated NodeLabelProto nodeLabels = 2; + repeated string nodeLabels = 2; } message LabelsToNodeIdsProto { - optional NodeLabelProto nodeLabels = 1; + optional string nodeLabels = 1; repeated NodeIdProto nodeId = 2; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 4994d23..0a379a7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -232,7 +232,7 @@ message GetNodesToLabelsRequestProto { } message GetNodesToLabelsResponseProto { - repeated NodeIdToLabelsInfoProto nodeToLabels = 1; + repeated NodeIdToLabelsProto nodeToLabels = 1; } message GetLabelsToNodesRequestProto { @@ -247,7 +247,8 @@ message GetClusterNodeLabelsRequestProto { } message GetClusterNodeLabelsResponseProto { - repeated NodeLabelProto nodeLabels = 1; + repeated string deprecatedNodeLabels = 1; + repeated NodeLabelProto nodeLabels = 2; } message UpdateApplicationPriorityRequestProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml index d6e33cb..9349b95 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-applications org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-applications-distributedshell - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN DistributedShell diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml index ff9020b..5e0cd0a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-applications org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-applications-unmanaged-am-launcher - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Unmanaged Am Launcher diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml index 391856a..9cab924 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-applications - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Applications pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml index 454993e..a095de5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml @@ -17,11 +17,11 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 org.apache.hadoop hadoop-yarn-client - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Client diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java index f307882..44d087d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/YarnClient.java @@ -714,7 +714,7 @@ public abstract ReservationListResponse listReservations( */ @Public @Unstable - public abstract Map> getNodeToLabels() + public abstract Map> getNodeToLabels() throws YarnException, IOException; /** @@ -729,7 +729,7 @@ public abstract ReservationListResponse listReservations( */ @Public @Unstable - public abstract Map> getLabelsToNodes() + public abstract Map> getLabelsToNodes() throws YarnException, IOException; /** @@ -745,7 +745,7 @@ public abstract ReservationListResponse listReservations( */ @Public @Unstable - public abstract Map> getLabelsToNodes( + public abstract Map> getLabelsToNodes( Set labels) throws YarnException, IOException; /** diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 6d57994..f2a297f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -851,21 +851,21 @@ public ReservationListResponse listReservations( } @Override - public Map> getNodeToLabels() throws YarnException, + public Map> getNodeToLabels() throws YarnException, IOException { return rmClient.getNodeToLabels(GetNodesToLabelsRequest.newInstance()) .getNodeToLabels(); } @Override - public Map> getLabelsToNodes() throws YarnException, + public Map> getLabelsToNodes() throws YarnException, IOException { return rmClient.getLabelsToNodes(GetLabelsToNodesRequest.newInstance()) .getLabelsToNodes(); } @Override - public Map> getLabelsToNodes(Set labels) + public Map> getLabelsToNodes(Set labels) throws YarnException, IOException { return rmClient.getLabelsToNodes( GetLabelsToNodesRequest.newInstance(labels)).getLabelsToNodes(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java index 314ca0a..64e63be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestAMRMClient.java @@ -39,7 +39,7 @@ import java.util.TreeSet; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.Credentials; @@ -133,6 +133,11 @@ public static void setup() throws Exception { // set the minimum allocation so that resource decrease can go under 1024 conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 512); conf.setLong(YarnConfiguration.NM_LOG_RETAIN_SECONDS, 1); + createClientAndCluster(conf); + } + + private static void createClientAndCluster(Configuration conf) + throws Exception { yarnCluster = new MiniYARNCluster(TestAMRMClient.class.getName(), nodeCount, 1, 1); yarnCluster.init(conf); yarnCluster.start(); @@ -656,6 +661,29 @@ private int getAllocatedContainersNumber( @Test (timeout=60000) public void testAMRMClient() throws YarnException, IOException { + registerAndAllocate(); + } + + @Test (timeout=60000) + public void testAMRMClientWithSaslEncryption() throws Exception { + conf.set(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION, "privacy"); + // we have to create a new instance of MiniYARNCluster to avoid SASL qop + // mismatches between client and server + tearDown(); + createClientAndCluster(conf); + startApp(); + registerAndAllocate(); + + // recreate the original MiniYARNCluster and YarnClient for other tests + conf.unset(CommonConfigurationKeysPublic.HADOOP_RPC_PROTECTION); + tearDown(); + createClientAndCluster(conf); + // unless we start an application the cancelApp() method will fail when + // it runs after this test + startApp(); + } + + private void registerAndAllocate() throws YarnException, IOException { AMRMClient amClient = null; try { // start am rm client diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java index 1e90a2a..3b2aa6f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java @@ -533,9 +533,9 @@ public void testGetLabelsToNodes() throws YarnException, IOException { client.start(); // Get labels to nodes mapping - Map> expectedLabelsToNodes = + Map> expectedLabelsToNodes = ((MockYarnClient)client).getLabelsToNodesMap(); - Map> labelsToNodes = client.getLabelsToNodes(); + Map> labelsToNodes = client.getLabelsToNodes(); Assert.assertEquals(labelsToNodes, expectedLabelsToNodes); Assert.assertEquals(labelsToNodes.size(), 3); @@ -559,18 +559,12 @@ public void testGetNodesToLabels() throws YarnException, IOException { client.start(); // Get labels to nodes mapping - Map> expectedNodesToLabels = ((MockYarnClient) client) + Map> expectedNodesToLabels = ((MockYarnClient) client) .getNodeToLabelsMap(); - Map> nodesToLabels = client.getNodeToLabels(); + Map> nodesToLabels = client.getNodeToLabels(); Assert.assertEquals(nodesToLabels, expectedNodesToLabels); Assert.assertEquals(nodesToLabels.size(), 1); - // Verify exclusivity - Set labels = nodesToLabels.get(NodeId.newInstance("host", 0)); - for (NodeLabel label : labels) { - Assert.assertFalse(label.isExclusive()); - } - client.stop(); client.close(); } @@ -808,7 +802,7 @@ public void setYarnApplicationState(YarnApplicationState state) { } @Override - public Map> getLabelsToNodes() + public Map> getLabelsToNodes() throws YarnException, IOException { when(mockLabelsToNodesResponse.getLabelsToNodes()).thenReturn( getLabelsToNodesMap()); @@ -816,48 +810,45 @@ public void setYarnApplicationState(YarnApplicationState state) { } @Override - public Map> getLabelsToNodes(Set labels) + public Map> getLabelsToNodes(Set labels) throws YarnException, IOException { when(mockLabelsToNodesResponse.getLabelsToNodes()).thenReturn( getLabelsToNodesMap(labels)); return super.getLabelsToNodes(labels); } - public Map> getLabelsToNodesMap() { - Map> map = new HashMap>(); + public Map> getLabelsToNodesMap() { + Map> map = new HashMap>(); Set setNodeIds = new HashSet(Arrays.asList( NodeId.newInstance("host1", 0), NodeId.newInstance("host2", 0))); - map.put(NodeLabel.newInstance("x"), setNodeIds); - map.put(NodeLabel.newInstance("y"), setNodeIds); - map.put(NodeLabel.newInstance("z"), setNodeIds); + map.put("x", setNodeIds); + map.put("y", setNodeIds); + map.put("z", setNodeIds); return map; } - public Map> getLabelsToNodesMap(Set labels) { - Map> map = new HashMap>(); - Set setNodeIds = - new HashSet(Arrays.asList( + public Map> getLabelsToNodesMap(Set labels) { + Map> map = new HashMap>(); + Set setNodeIds = new HashSet(Arrays.asList( NodeId.newInstance("host1", 0), NodeId.newInstance("host2", 0))); - for(String label : labels) { - map.put(NodeLabel.newInstance(label), setNodeIds); + for (String label : labels) { + map.put(label, setNodeIds); } return map; } @Override - public Map> getNodeToLabels() throws YarnException, + public Map> getNodeToLabels() throws YarnException, IOException { when(mockNodeToLabelsResponse.getNodeToLabels()).thenReturn( getNodeToLabelsMap()); return super.getNodeToLabels(); } - public Map> getNodeToLabelsMap() { - Map> map = new HashMap>(); - Set setNodeLabels = new HashSet(Arrays.asList( - NodeLabel.newInstance("x", false), - NodeLabel.newInstance("y", false))); + public Map> getNodeToLabelsMap() { + Map> map = new HashMap>(); + Set setNodeLabels = new HashSet(Arrays.asList("x", "y")); map.put(NodeId.newInstance("host", 0), setNodeLabels); return map; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml index a982ab2..795335f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-common - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Common diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodeLabelsResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodeLabelsResponsePBImpl.java index 227abe9..54d454d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodeLabelsResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetClusterNodeLabelsResponsePBImpl.java @@ -72,11 +72,15 @@ private void mergeLocalToBuilder() { private void addNodeLabelsToProto() { maybeInitBuilder(); builder.clearNodeLabels(); + builder.clearDeprecatedNodeLabels(); List protoList = new ArrayList(); + List protoListString = new ArrayList(); for (NodeLabel r : this.updatedNodeLabels) { protoList.add(convertToProtoFormat(r)); + protoListString.add(r.getName()); } builder.addAllNodeLabels(protoList); + builder.addAllDeprecatedNodeLabels(protoListString); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetLabelsToNodesResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetLabelsToNodesResponsePBImpl.java index 418fcbd..961b9f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetLabelsToNodesResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetLabelsToNodesResponsePBImpl.java @@ -46,7 +46,7 @@ GetLabelsToNodesResponseProto.Builder builder = null; boolean viaProto = false; - private Map> labelsToNodes; + private Map> labelsToNodes; public GetLabelsToNodesResponsePBImpl() { this.builder = GetLabelsToNodesResponseProto.newBuilder(); @@ -63,7 +63,7 @@ private void initLabelsToNodes() { } GetLabelsToNodesResponseProtoOrBuilder p = viaProto ? proto : builder; List list = p.getLabelsToNodesList(); - this.labelsToNodes = new HashMap>(); + this.labelsToNodes = new HashMap>(); for (LabelsToNodeIdsProto c : list) { Set setNodes = new HashSet(); @@ -73,7 +73,7 @@ private void initLabelsToNodes() { } if (!setNodes.isEmpty()) { this.labelsToNodes - .put(new NodeLabelPBImpl(c.getNodeLabels()), setNodes); + .put(c.getNodeLabels(), setNodes); } } } @@ -97,7 +97,7 @@ private void addLabelsToNodesToProto() { public Iterator iterator() { return new Iterator() { - Iterator>> iter = + Iterator>> iter = labelsToNodes.entrySet().iterator(); @Override @@ -107,13 +107,13 @@ public void remove() { @Override public LabelsToNodeIdsProto next() { - Entry> now = iter.next(); + Entry> now = iter.next(); Set nodeProtoSet = new HashSet(); for(NodeId n : now.getValue()) { nodeProtoSet.add(convertToProtoFormat(n)); } return LabelsToNodeIdsProto.newBuilder() - .setNodeLabels(convertToProtoFormat(now.getKey())) + .setNodeLabels(now.getKey()) .addAllNodeId(nodeProtoSet) .build(); } @@ -153,10 +153,6 @@ private NodeIdProto convertToProtoFormat(NodeId t) { return ((NodeIdPBImpl)t).getProto(); } - private NodeLabelProto convertToProtoFormat(NodeLabel l) { - return ((NodeLabelPBImpl)l).getProto(); - } - @Override public int hashCode() { assert false : "hashCode not designed"; @@ -176,7 +172,7 @@ public boolean equals(Object other) { @Override @Public @Evolving - public void setLabelsToNodes(Map> map) { + public void setLabelsToNodes(Map> map) { initLabelsToNodes(); labelsToNodes.clear(); labelsToNodes.putAll(map); @@ -185,7 +181,7 @@ public void setLabelsToNodes(Map> map) { @Override @Public @Evolving - public Map> getLabelsToNodes() { + public Map> getLabelsToNodes() { initLabelsToNodes(); return this.labelsToNodes; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNodesToLabelsResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNodesToLabelsResponsePBImpl.java index 52be73f..26a6389 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNodesToLabelsResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/GetNodesToLabelsResponsePBImpl.java @@ -32,11 +32,13 @@ import org.apache.hadoop.yarn.api.records.impl.pb.NodeLabelPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; import org.apache.hadoop.yarn.api.protocolrecords.GetNodesToLabelsResponse; -import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdToLabelsInfoProto; +import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdToLabelsProto; import org.apache.hadoop.yarn.proto.YarnProtos.NodeLabelProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNodesToLabelsResponseProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos.GetNodesToLabelsResponseProtoOrBuilder; +import com.google.common.collect.Sets; + public class GetNodesToLabelsResponsePBImpl extends GetNodesToLabelsResponse { GetNodesToLabelsResponseProto proto = GetNodesToLabelsResponseProto @@ -44,7 +46,7 @@ GetNodesToLabelsResponseProto.Builder builder = null; boolean viaProto = false; - private Map> nodeToLabels; + private Map> nodeToLabels; public GetNodesToLabelsResponsePBImpl() { this.builder = GetNodesToLabelsResponseProto.newBuilder(); @@ -60,15 +62,12 @@ private void initNodeToLabels() { return; } GetNodesToLabelsResponseProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getNodeToLabelsList(); - this.nodeToLabels = new HashMap>(); - - for (NodeIdToLabelsInfoProto c : list) { - Set labels = new HashSet(); - for (NodeLabelProto l : c.getNodeLabelsList()) { - labels.add(new NodeLabelPBImpl(l)); - } - this.nodeToLabels.put(new NodeIdPBImpl(c.getNodeId()), labels); + List list = p.getNodeToLabelsList(); + this.nodeToLabels = new HashMap>(); + + for (NodeIdToLabelsProto c : list) { + this.nodeToLabels.put(new NodeIdPBImpl(c.getNodeId()), + Sets.newHashSet(c.getNodeLabelsList())); } } @@ -85,13 +84,13 @@ private void addNodeToLabelsToProto() { if (nodeToLabels == null) { return; } - Iterable iterable = - new Iterable() { + Iterable iterable = + new Iterable() { @Override - public Iterator iterator() { - return new Iterator() { + public Iterator iterator() { + return new Iterator() { - Iterator>> iter = nodeToLabels + Iterator>> iter = nodeToLabels .entrySet().iterator(); @Override @@ -100,16 +99,11 @@ public void remove() { } @Override - public NodeIdToLabelsInfoProto next() { - Entry> now = iter.next(); - Set labelProtoList = - new HashSet(); - for (NodeLabel l : now.getValue()) { - labelProtoList.add(convertToProtoFormat(l)); - } - return NodeIdToLabelsInfoProto.newBuilder() + public NodeIdToLabelsProto next() { + Entry> now = iter.next(); + return NodeIdToLabelsProto.newBuilder() .setNodeId(convertToProtoFormat(now.getKey())) - .addAllNodeLabels(labelProtoList).build(); + .addAllNodeLabels(now.getValue()).build(); } @Override @@ -144,13 +138,13 @@ public GetNodesToLabelsResponseProto getProto() { } @Override - public Map> getNodeToLabels() { + public Map> getNodeToLabels() { initNodeToLabels(); return this.nodeToLabels; } @Override - public void setNodeToLabels(Map> map) { + public void setNodeToLabels(Map> map) { initNodeToLabels(); nodeToLabels.clear(); nodeToLabels.putAll(map); @@ -159,10 +153,6 @@ public void setNodeToLabels(Map> map) { private NodeIdProto convertToProtoFormat(NodeId t) { return ((NodeIdPBImpl)t).getProto(); } - - private NodeLabelProto convertToProtoFormat(NodeLabel t) { - return ((NodeLabelPBImpl)t).getProto(); - } @Override public int hashCode() { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java index 1ff0bef..0f1e400 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/AddToClusterNodeLabelsRequestPBImpl.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; import java.util.ArrayList; -import java.util.Collection; import java.util.List; import org.apache.hadoop.yarn.api.records.NodeLabel; @@ -30,7 +29,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.AddToClusterNodeLabelsRequest; public class AddToClusterNodeLabelsRequestPBImpl extends - AddToClusterNodeLabelsRequest { + AddToClusterNodeLabelsRequest { AddToClusterNodeLabelsRequestProto proto = AddToClusterNodeLabelsRequestProto .getDefaultInstance(); AddToClusterNodeLabelsRequestProto.Builder builder = null; @@ -71,11 +70,15 @@ private void mergeLocalToBuilder() { private void addNodeLabelsToProto() { maybeInitBuilder(); builder.clearNodeLabels(); + builder.clearDeprecatedNodeLabels(); List protoList = new ArrayList(); + List protoListString = new ArrayList(); for (NodeLabel r : this.updatedNodeLabels) { protoList.add(convertToProtoFormat(r)); + protoListString.add(r.getName()); } builder.addAllNodeLabels(protoList); + builder.addAllDeprecatedNodeLabels(protoListString); } @Override @@ -101,17 +104,6 @@ private void maybeInitBuilder() { viaProto = false; } - @Override - public void setNodeLabels(List updatedNodeLabels) { - maybeInitBuilder(); - this.updatedNodeLabels = new ArrayList<>(); - if (updatedNodeLabels == null) { - builder.clearNodeLabels(); - return; - } - this.updatedNodeLabels.addAll(updatedNodeLabels); - } - private void initLocalNodeLabels() { AddToClusterNodeLabelsRequestProtoOrBuilder p = viaProto ? proto : builder; List attributesProtoList = p.getNodeLabelsList(); @@ -121,15 +113,6 @@ private void initLocalNodeLabels() { } } - @Override - public List getNodeLabels() { - if (this.updatedNodeLabels != null) { - return this.updatedNodeLabels; - } - initLocalNodeLabels(); - return this.updatedNodeLabels; - } - private NodeLabel convertFromProtoFormat(NodeLabelProto p) { return new NodeLabelPBImpl(p); } @@ -142,4 +125,24 @@ private NodeLabelProto convertToProtoFormat(NodeLabel t) { public String toString() { return getProto().toString(); } + + @Override + public void setNodeLabels(List nodeLabels) { + maybeInitBuilder(); + this.updatedNodeLabels = new ArrayList<>(); + if (nodeLabels == null) { + builder.clearNodeLabels(); + return; + } + this.updatedNodeLabels.addAll(nodeLabels); + } + + @Override + public List getNodeLabels() { + if (this.updatedNodeLabels != null) { + return this.updatedNodeLabels; + } + initLocalNodeLabels(); + return this.updatedNodeLabels; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java index 3b15b27..f6634b4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/ReplaceLabelsOnNodeRequestPBImpl.java @@ -28,7 +28,7 @@ import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl; import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; -import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.NodeIdToLabelsNameProto; +import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdToLabelsProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ReplaceLabelsOnNodeRequestProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.ReplaceLabelsOnNodeRequestProtoOrBuilder; import org.apache.hadoop.yarn.server.api.protocolrecords.ReplaceLabelsOnNodeRequest; @@ -58,10 +58,10 @@ private void initNodeToLabels() { return; } ReplaceLabelsOnNodeRequestProtoOrBuilder p = viaProto ? proto : builder; - List list = p.getNodeToLabelsList(); + List list = p.getNodeToLabelsList(); this.nodeIdToLabels = new HashMap>(); - for (NodeIdToLabelsNameProto c : list) { + for (NodeIdToLabelsProto c : list) { this.nodeIdToLabels.put(new NodeIdPBImpl(c.getNodeId()), Sets.newHashSet(c.getNodeLabelsList())); } @@ -80,11 +80,11 @@ private void addNodeToLabelsToProto() { if (nodeIdToLabels == null) { return; } - Iterable iterable = - new Iterable() { + Iterable iterable = + new Iterable() { @Override - public Iterator iterator() { - return new Iterator() { + public Iterator iterator() { + return new Iterator() { Iterator>> iter = nodeIdToLabels .entrySet().iterator(); @@ -95,9 +95,9 @@ public void remove() { } @Override - public NodeIdToLabelsNameProto next() { + public NodeIdToLabelsProto next() { Entry> now = iter.next(); - return NodeIdToLabelsNameProto.newBuilder() + return NodeIdToLabelsProto.newBuilder() .setNodeId(convertToProtoFormat(now.getKey())).clearNodeLabels() .addAllNodeLabels(now.getValue()).build(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java index 03588f1..f45465a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/util/WebAppUtils.java @@ -87,21 +87,46 @@ public static void setNMWebAppHostNameAndPort(Configuration conf, hostName + ":" + port); } } - - public static String getRMWebAppURLWithScheme(Configuration conf) { - return getHttpSchemePrefix(conf) + getRMWebAppURLWithoutScheme(conf); - } - - public static String getRMWebAppURLWithoutScheme(Configuration conf) { - if (YarnConfiguration.useHttps(conf)) { - return conf.get(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, + + public static String getRMWebAppURLWithoutScheme(Configuration conf, + boolean isHAEnabled) { + YarnConfiguration yarnConfig = new YarnConfiguration(conf); + // set RM_ID if we have not configure it. + if (isHAEnabled) { + String rmId = yarnConfig.get(YarnConfiguration.RM_HA_ID); + if (rmId == null || rmId.isEmpty()) { + List rmIds = new ArrayList<>(HAUtil.getRMHAIds(conf)); + if (rmIds != null && !rmIds.isEmpty()) { + yarnConfig.set(YarnConfiguration.RM_HA_ID, rmIds.get(0)); + } + } + } + if (YarnConfiguration.useHttps(yarnConfig)) { + if (isHAEnabled) { + return HAUtil.getConfValueForRMInstance( + YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, yarnConfig); + } + return yarnConfig.get(YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, YarnConfiguration.DEFAULT_RM_WEBAPP_HTTPS_ADDRESS); }else { - return conf.get(YarnConfiguration.RM_WEBAPP_ADDRESS, + if (isHAEnabled) { + return HAUtil.getConfValueForRMInstance( + YarnConfiguration.RM_WEBAPP_ADDRESS, yarnConfig); + } + return yarnConfig.get(YarnConfiguration.RM_WEBAPP_ADDRESS, YarnConfiguration.DEFAULT_RM_WEBAPP_ADDRESS); } } + public static String getRMWebAppURLWithScheme(Configuration conf) { + return getHttpSchemePrefix(conf) + getRMWebAppURLWithoutScheme( + conf, HAUtil.isHAEnabled(conf)); + } + + public static String getRMWebAppURLWithoutScheme(Configuration conf) { + return getRMWebAppURLWithoutScheme(conf, false); + } + public static List getProxyHostsAndPortsForAmFilter( Configuration conf) { List addrs = new ArrayList(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java index 2e76865..7389423 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfiguration.java @@ -41,6 +41,22 @@ public void testDefaultRMWebUrl() throws Exception { // specifically add slashes and Jetty doesn't handle double slashes. Assert.assertNotSame("RM Web Url is not correct", "http://0.0.0.0:8088", rmWebUrl); + + // test it in HA scenario + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, "rm1, rm2"); + conf.set("yarn.resourcemanager.webapp.address.rm1", "10.10.10.10:18088"); + conf.set("yarn.resourcemanager.webapp.address.rm2", "20.20.20.20:28088"); + String rmWebUrlinHA = WebAppUtils.getRMWebAppURLWithScheme(conf); + Assert.assertEquals("http://10.10.10.10:18088", rmWebUrlinHA); + + YarnConfiguration conf2 = new YarnConfiguration(); + conf2.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf2.set(YarnConfiguration.RM_HA_IDS, "rm1, rm2"); + conf2.set("yarn.resourcemanager.hostname.rm1", "30.30.30.30"); + conf2.set("yarn.resourcemanager.hostname.rm2", "40.40.40.40"); + String rmWebUrlinHA2 = WebAppUtils.getRMWebAppURLWithScheme(conf2); + Assert.assertEquals("http://30.30.30.30:8088", rmWebUrlinHA2); } @Test diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml index 815f59c..3289c9b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 hadoop-yarn-registry - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Registry diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml index 64814a5..5b653c3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml @@ -22,12 +22,12 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server-applicationhistoryservice - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN ApplicationHistoryService diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml index 9ccb783..f5e17d7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server-common - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Server Common diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml index 1768d5c..9c85c5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server-nodemanager - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN NodeManager @@ -280,6 +280,18 @@ + org.apache.rat + apache-rat-plugin + + + src/main/native/container-executor/impl/compat/fstatat.h + src/main/native/container-executor/impl/compat/openat.h + src/main/native/container-executor/impl/compat/unlinkat.h + + + + + org.apache.hadoop hadoop-maven-plugins diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt index d4d6ae1..fbc794c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/CMakeLists.txt @@ -19,12 +19,38 @@ cmake_minimum_required(VERSION 2.6 FATAL_ERROR) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/../../../../../hadoop-common-project/hadoop-common) include(HadoopCommon) +# determine if container-executor.conf.dir is an absolute +# path in case the OS we're compiling on doesn't have +# a hook in get_executable. We'll use this define +# later in the code to potentially throw a compile error +string(REGEX MATCH . HCD_ONE "${HADOOP_CONF_DIR}") +string(COMPARE EQUAL ${HCD_ONE} / HADOOP_CONF_DIR_IS_ABS) + # Note: can't use -D_FILE_OFFSET_BITS=64, see MAPREDUCE-4258 string(REPLACE "-D_FILE_OFFSET_BITS=64" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}") string(REPLACE "-D_FILE_OFFSET_BITS=64" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +include(CheckIncludeFiles) +check_include_files("sys/types.h;sys/sysctl.h" HAVE_SYS_SYSCTL_H) + include(CheckFunctionExists) +check_function_exists(canonicalize_file_name HAVE_CANONICALIZE_FILE_NAME) check_function_exists(fcloseall HAVE_FCLOSEALL) +check_function_exists(fchmodat HAVE_FCHMODAT) +check_function_exists(fdopendir HAVE_FDOPENDIR) +check_function_exists(fstatat HAVE_FSTATAT) +check_function_exists(openat HAVE_OPENAT) +check_function_exists(unlinkat HAVE_UNLINKAT) + +include(CheckSymbolExists) +check_symbol_exists(sysctl "sys/types.h;sys/sysctl.h" HAVE_SYSCTL) + +if(APPLE) + include_directories( /System/Library/Frameworks ) + find_library(COCOA_LIBRARY Cocoa) + mark_as_advanced(COCOA_LIBRARY) + set(EXTRA_LIBS ${COCOA_LIBRARY}) +endif(APPLE) function(output_directory TGT DIR) set_target_properties(${TGT} PROPERTIES @@ -46,6 +72,7 @@ configure_file(${CMAKE_SOURCE_DIR}/config.h.cmake ${CMAKE_BINARY_DIR}/config.h) add_library(container main/native/container-executor/impl/configuration.c main/native/container-executor/impl/container-executor.c + main/native/container-executor/impl/get_executable.c ) add_executable(container-executor @@ -60,6 +87,6 @@ add_executable(test-container-executor main/native/container-executor/test/test-container-executor.c ) target_link_libraries(test-container-executor - container + container ${EXTRA_LIBS} ) output_directory(test-container-executor target/usr/local/bin) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/config.h.cmake b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/config.h.cmake index 0f7a490..d8e710f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/config.h.cmake +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/config.h.cmake @@ -18,8 +18,26 @@ #ifndef CONFIG_H #define CONFIG_H +/* custom configs */ + #cmakedefine HADOOP_CONF_DIR "@HADOOP_CONF_DIR@" -#cmakedefine HAVE_FCLOSEALL "@HAVE_FCLOSEALL@" +#cmakedefine HADOOP_CONF_DIR_IS_ABS "@HADOOP_CONF_DIR_IS_ABS@" + +/* specific functions */ + +#cmakedefine HAVE_CANONICALIZE_FILE_NAME @HAVE_CANONICALIZE_FILE_NAME@ +#cmakedefine HAVE_FCHMODAT @HAVE_FCHMODAT@ +#cmakedefine HAVE_FCLOSEALL @HAVE_FCLOSEALL@ +#cmakedefine HAVE_FDOPENDIR @HAVE_FDOPENDIR@ +#cmakedefine HAVE_FSTATAT @HAVE_FSTATAT@ +#cmakedefine HAVE_OPENAT @HAVE_OPENAT@ +#cmakedefine HAVE_SYSCTL @HAVE_SYSCTL@ +#cmakedefine HAVE_UNLINKAT @HAVE_UNLINKAT@ + + +/* specific headers */ + +#cmakedefine HAVE_SYS_SYSCTL_H @HAVE_SYS_SYSCTL_H@ #endif diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fchmodat.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fchmodat.h new file mode 100644 index 0000000..7812b5d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fchmodat.h @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _FCHMODAT_H_ +#define _FCHMODAT_H_ + +#include + +#include + +#define AT_SYMLINK_NOFOLLOW 0x01 + +static int +fchmodat(int fd, const char *path, mode_t mode, int flag) +{ + int cfd, error, ret; + + cfd = open(".", O_RDONLY | O_DIRECTORY); + if (cfd == -1) + return (-1); + + if (fchdir(fd) == -1) { + error = errno; + (void)close(cfd); + errno = error; + return (-1); + } + + if (flag == AT_SYMLINK_NOFOLLOW) + ret = lchmod(path, mode); + else + ret = chmod(path, mode); + + error = errno; + (void)fchdir(cfd); + (void)close(cfd); + errno = error; + return (ret); +} + +#endif /* !_FCHMODAT_H_ */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fdopendir.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fdopendir.h new file mode 100644 index 0000000..1f9bdc3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fdopendir.h @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _FDOPENDIR_H_ +#define _FDOPENDIR_H_ + +#include +#include +#include + +DIR * +fdopendir(int fd) +{ + int cfd, error; + DIR *dfd; + + cfd = open(".", O_RDONLY | O_DIRECTORY); + if (cfd == -1) + return (NULL); + + if (fchdir(fd) == -1) { + error = errno; + (void)close(cfd); + errno = error; + return (NULL); + } + + dfd=opendir("."); + error = errno; + (void)fchdir(cfd); + (void)close(cfd); + errno = error; + return (dfd); +} + +#endif /* !_FDOPENDIR_H_ */ + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fstatat.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fstatat.h new file mode 100644 index 0000000..e1b1658 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/fstatat.h @@ -0,0 +1,67 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _FSTATAT_H_ +#define _FSTATAT_H_ + +#include + +#include + +#define AT_SYMLINK_NOFOLLOW 0x01 + +static int +fstatat(int fd, const char *path, struct stat *buf, int flag) +{ + int cfd, error, ret; + + cfd = open(".", O_RDONLY | O_DIRECTORY); + if (cfd == -1) + return (-1); + + if (fchdir(fd) == -1) { + error = errno; + (void)close(cfd); + errno = error; + return (-1); + } + + if (flag == AT_SYMLINK_NOFOLLOW) + ret = lstat(path, buf); + else + ret = stat(path, buf); + + error = errno; + (void)fchdir(cfd); + (void)close(cfd); + errno = error; + return (ret); +} + +#endif /* !_FSTATAT_H_ */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/openat.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/openat.h new file mode 100644 index 0000000..005be37 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/openat.h @@ -0,0 +1,74 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _OPENAT_H_ +#define _OPENAT_H_ + +#include +#include +#include + +static int +openat(int fd, const char *path, int flags, ...) +{ + int cfd, ffd, error; + + cfd = open(".", O_RDONLY | O_DIRECTORY); + if (cfd == -1) + return (-1); + + if (fchdir(fd) == -1) { + error = errno; + (void)close(cfd); + errno = error; + return (-1); + } + + if ((flags & O_CREAT) != 0) { + va_list ap; + int mode; + + va_start(ap, flags); + mode = va_arg(ap, int); + va_end(ap); + + ffd = open(path, flags, mode); + } else { + ffd = open(path, flags); + } + + error = errno; + (void)fchdir(cfd); + (void)close(cfd); + errno = error; + return (ffd); +} + +#endif /* !_OPENAT_H_ */ + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/unlinkat.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/unlinkat.h new file mode 100644 index 0000000..a4977a1 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/unlinkat.h @@ -0,0 +1,67 @@ +/*- + * Copyright (c) 2012 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Pawel Jakub Dawidek under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _UNLINKAT_H_ +#define _UNLINKAT_H_ + +#include +#include + +#define AT_REMOVEDIR 0x01 + +static int +unlinkat(int fd, const char *path, int flag) +{ + int cfd, error, ret; + + cfd = open(".", O_RDONLY | O_DIRECTORY); + if (cfd == -1) + return (-1); + + if (fchdir(fd) == -1) { + error = errno; + (void)close(cfd); + errno = error; + return (-1); + } + + if (flag == AT_REMOVEDIR) + ret = rmdir(path); + else + ret = unlink(path); + + error = errno; + (void)fchdir(cfd); + (void)close(cfd); + errno = error; + return (ret); +} + +#endif /* !_UNLINKAT_H_ */ + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c index 3447524..69ceaf6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.c @@ -17,7 +17,7 @@ */ // ensure we get the posix version of dirname by including this first -#include +#include #include "configuration.h" #include "container-executor.h" @@ -68,7 +68,7 @@ static int is_only_root_writable(const char *file) { return 0; } if ((file_stat.st_mode & (S_IWGRP | S_IWOTH)) != 0) { - fprintf(ERRORFILE, + fprintf(ERRORFILE, "File %s must not be world or group writable, but is %03lo\n", file, (unsigned long)file_stat.st_mode & (~S_IFMT)); return 0; @@ -92,8 +92,13 @@ char *resolve_config_path(const char* file_name, const char *root) { real_fname = buffer; } +#ifdef HAVE_CANONICALIZE_FILE_NAME + char * ret = (real_fname == NULL) ? NULL : canonicalize_file_name(real_fname); +#else char * ret = (real_fname == NULL) ? NULL : realpath(real_fname, NULL); +#endif #ifdef DEBUG + fprintf(stderr,"ret = %s\n", ret); fprintf(stderr, "resolve_config_path(file_name=%s,root=%s)=%s\n", file_name, root ? root : "null", ret ? ret : "null"); #endif @@ -102,7 +107,7 @@ char *resolve_config_path(const char* file_name, const char *root) { /** * Ensure that the configuration file and all of the containing directories - * are only writable by root. Otherwise, an attacker can change the + * are only writable by root. Otherwise, an attacker can change the * configuration and potentially cause damage. * returns 0 if permissions are ok */ @@ -155,7 +160,7 @@ void read_config(const char* file_name, struct configuration *cfg) { exit(OUT_OF_MEMORY); } size_read = getline(&line,&linesize,conf_file); - + //feof returns true only after we read past EOF. //so a file with no new line, at last can reach this place //if size_read returns negative check for eof condition @@ -235,7 +240,7 @@ void read_config(const char* file_name, struct configuration *cfg) { free(line); } - + //close the file fclose(conf_file); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h index 8f87cb2..eced13b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/configuration.h @@ -16,6 +16,10 @@ * limitations under the License. */ +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + #include /** Define a platform-independent constant instead of using PATH_MAX */ @@ -24,7 +28,7 @@ /** * Ensure that the configuration file and all of the containing directories - * are only writable by root. Otherwise, an attacker can change the + * are only writable by root. Otherwise, an attacker can change the * configuration and potentially cause damage. * returns 0 if permissions are ok */ @@ -78,7 +82,7 @@ void free_configurations(struct configuration *cfg); /** * If str is a string of the form key=val, find 'key' - * + * * @param input The input string * @param out Where to put the output string. * @param out_len The length of the output buffer. @@ -91,7 +95,7 @@ int get_kv_key(const char *input, char *out, size_t out_len); /** * If str is a string of the form key=val, find 'val' - * + * * @param input The input string * @param out Where to put the output string. * @param out_len The length of the output buffer. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 4ddd6d9..bdbcdfa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -41,6 +41,28 @@ #include #include +#include "config.h" + +#ifndef HAVE_FCHMODAT +#include "compat/fchmodat.h" +#endif + +#ifndef HAVE_FDOPENDIR +#include "compat/fdopendir.h" +#endif + +#ifndef HAVE_FSTATAT +#include "compat/fstatat.h" +#endif + +#ifndef HAVE_OPENAT +#include "compat/openat.h" +#endif + +#ifndef HAVE_UNLINKAT +#include "compat/unlinkat.h" +#endif + static const int DEFAULT_MIN_USERID = 1000; static const char* DEFAULT_BANNED_USERS[] = {"mapred", "hdfs", "bin", 0}; @@ -88,31 +110,14 @@ char *get_nodemanager_group() { return get_value(NM_GROUP_KEY, &executor_cfg); } -/** - * get the executable filename. - */ -char* get_executable() { - char buffer[EXECUTOR_PATH_MAX]; - snprintf(buffer, EXECUTOR_PATH_MAX, "/proc/%" PRId64 "/exe", (int64_t)getpid()); - char *filename = malloc(EXECUTOR_PATH_MAX); - ssize_t len = readlink(buffer, filename, EXECUTOR_PATH_MAX); - if (len == -1) { - fprintf(ERRORFILE, "Can't get executable name from %s - %s\n", buffer, - strerror(errno)); - exit(-1); - } else if (len >= EXECUTOR_PATH_MAX) { - fprintf(ERRORFILE, "Executable name %.*s is longer than %d characters.\n", - EXECUTOR_PATH_MAX, filename, EXECUTOR_PATH_MAX); - exit(-1); - } - filename[len] = '\0'; - return filename; -} - int check_executor_permissions(char *executable_file) { errno = 0; +#ifdef HAVE_CANONICALIZE_FILE_NAME + char * resolved_path = canonicalize_file_name(executable_file); +#else char * resolved_path = realpath(executable_file, NULL); +#endif if (resolved_path == NULL) { fprintf(ERRORFILE, "Error resolving the canonical name for the executable : %s!", @@ -123,7 +128,7 @@ int check_executor_permissions(char *executable_file) { struct stat filestat; errno = 0; if (stat(resolved_path, &filestat) != 0) { - fprintf(ERRORFILE, + fprintf(ERRORFILE, "Could not stat the executable : %s!.\n", strerror(errno)); return -1; } @@ -185,6 +190,7 @@ static int change_effective_user(uid_t user, gid_t group) { return 0; } +#ifdef __linux /** * Write the pid of the current process to the cgroup file. * cgroup_file: Path to cgroup file where pid needs to be written to. @@ -222,6 +228,7 @@ static int write_pid_to_cgroup_as_root(const char* cgroup_file, pid_t pid) { return 0; } +#endif /** * Write the pid of the current process into the pid file. @@ -377,7 +384,7 @@ static int wait_and_write_exit_code(pid_t pid, const char* exit_code_file) { * priviledges. */ int change_user(uid_t user, gid_t group) { - if (user == getuid() && user == geteuid() && + if (user == getuid() && user == geteuid() && group == getgid() && group == getegid()) { return 0; } @@ -389,7 +396,7 @@ int change_user(uid_t user, gid_t group) { return SETUID_OPER_FAILED; } if (setgid(group) != 0) { - fprintf(LOGFILE, "unable to set group to %d - %s\n", group, + fprintf(LOGFILE, "unable to set group to %d - %s\n", group, strerror(errno)); fprintf(LOGFILE, "Real: %d:%d; Effective: %d:%d\n", getuid(), getgid(), geteuid(), getegid()); @@ -408,7 +415,7 @@ int change_user(uid_t user, gid_t group) { /** * Utility function to concatenate argB to argA using the concat_pattern. */ -char *concatenate(char *concat_pattern, char *return_path_name, +char *concatenate(char *concat_pattern, char *return_path_name, int numArgs, ...) { va_list ap; va_start(ap, numArgs); @@ -585,12 +592,12 @@ int check_dir(const char* npath, mode_t st_mode, mode_t desired, int finalCompon * Function to prepare the container directories. * It creates the container work and log directories. */ -static int create_container_directories(const char* user, const char *app_id, +static int create_container_directories(const char* user, const char *app_id, const char *container_id, char* const* local_dir, char* const* log_dir, const char *work_dir) { // create dirs as 0750 const mode_t perms = S_IRWXU | S_IRGRP | S_IXGRP; if (app_id == NULL || container_id == NULL || user == NULL || user_detail == NULL || user_detail->pw_name == NULL) { - fprintf(LOGFILE, + fprintf(LOGFILE, "Either app_id, container_id or the user passed is null.\n"); return -1; } @@ -598,7 +605,7 @@ static int create_container_directories(const char* user, const char *app_id, int result = -1; char* const* local_dir_ptr; for(local_dir_ptr = local_dir; *local_dir_ptr != NULL; ++local_dir_ptr) { - char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, + char *container_dir = get_container_work_directory(*local_dir_ptr, user, app_id, container_id); if (container_dir == NULL) { return -1; @@ -713,7 +720,7 @@ struct passwd* check_user(const char *user) { char *end_ptr = NULL; min_uid = strtol(min_uid_str, &end_ptr, 10); if (min_uid_str == end_ptr || *end_ptr != '\0') { - fprintf(LOGFILE, "Illegal value of %s for %s in configuration\n", + fprintf(LOGFILE, "Illegal value of %s for %s in configuration\n", min_uid_str, MIN_USERID_KEY); fflush(LOGFILE); free(min_uid_str); @@ -786,7 +793,23 @@ int set_user(const char *user) { */ static int change_owner(const char* path, uid_t user, gid_t group) { if (geteuid() == user && getegid() == group) { + + /* + * On the BSDs, this is not a guaranteed shortcut + * since group permissions are inherited + */ + +#if defined(__FreeBSD__) || defined(__NetBSD__) + if (chown(path, user, group) != 0) { + fprintf(LOGFILE, "Can't chown %s to %d:%d - %s\n", path, user, group, + strerror(errno)); + return -1; + } return 0; +#else + return 0; +#endif + } else { uid_t old_user = geteuid(); gid_t old_group = getegid(); @@ -824,14 +847,14 @@ int create_directory_for_user(const char* path) { if (ret == 0) { if (0 == mkdir(path, permissions) || EEXIST == errno) { // need to reassert the group sticky bit - if (chmod(path, permissions) != 0) { - fprintf(LOGFILE, "Can't chmod %s to add the sticky bit - %s\n", - path, strerror(errno)); - ret = -1; - } else if (change_owner(path, user, nm_gid) != 0) { + if (change_owner(path, user, nm_gid) != 0) { fprintf(LOGFILE, "Failed to chown %s to %d:%d: %s\n", path, user, nm_gid, strerror(errno)); ret = -1; + } else if (chmod(path, permissions) != 0) { + fprintf(LOGFILE, "Can't chmod %s to add the sticky bit - %s\n", + path, strerror(errno)); + ret = -1; } } else { fprintf(LOGFILE, "Failed to create directory %s - %s\n", path, @@ -841,7 +864,7 @@ int create_directory_for_user(const char* path) { } if (change_effective_user(user, group) != 0) { fprintf(LOGFILE, "Failed to change user to %i - %i\n", user, group); - + ret = -1; } return ret; @@ -874,14 +897,14 @@ static int open_file_as_nm(const char* filename) { * The input stream is closed. * Return 0 if everything is ok. */ -static int copy_file(int input, const char* in_filename, +static int copy_file(int input, const char* in_filename, const char* out_filename, mode_t perm) { const int buffer_size = 128*1024; char buffer[buffer_size]; int out_fd = open(out_filename, O_WRONLY|O_CREAT|O_EXCL|O_NOFOLLOW, perm); if (out_fd == -1) { - fprintf(LOGFILE, "Can't open %s for output - %s\n", out_filename, + fprintf(LOGFILE, "Can't open %s for output - %s\n", out_filename, strerror(errno)); fflush(LOGFILE); return -1; @@ -903,13 +926,13 @@ static int copy_file(int input, const char* in_filename, len = read(input, buffer, buffer_size); } if (len < 0) { - fprintf(LOGFILE, "Failed to read file %s - %s\n", in_filename, + fprintf(LOGFILE, "Failed to read file %s - %s\n", in_filename, strerror(errno)); close(out_fd); return -1; } if (close(out_fd) != 0) { - fprintf(LOGFILE, "Failed to close file %s - %s\n", out_filename, + fprintf(LOGFILE, "Failed to close file %s - %s\n", out_filename, strerror(errno)); return -1; } @@ -1351,20 +1374,22 @@ int launch_docker_container_as_user(const char * user, const char *app_id, } if (pid != 0) { +#ifdef __linux fprintf(LOGFILE, "Writing to cgroup task files...\n"); // cgroups-based resource enforcement if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { - // write pid to cgroups - char* const* cgroup_ptr; - for (cgroup_ptr = resources_values; cgroup_ptr != NULL && + // write pid to cgroups + char* const* cgroup_ptr; + for (cgroup_ptr = resources_values; cgroup_ptr != NULL && *cgroup_ptr != NULL; ++cgroup_ptr) { - if (strcmp(*cgroup_ptr, "none") != 0 && + if (strcmp(*cgroup_ptr, "none") != 0 && write_pid_to_cgroup_as_root(*cgroup_ptr, pid) != 0) { - exit_code = WRITE_CGROUP_FAILED; - goto cleanup; - } - } + exit_code = WRITE_CGROUP_FAILED; + goto cleanup; + } + } } +#endif // write pid to pidfile fprintf(LOGFILE, "Writing pid file...\n"); @@ -1494,7 +1519,7 @@ int launch_container_as_user(const char *user, const char *app_id, goto cleanup; } - // setsid + // setsid pid_t pid = setsid(); if (pid == -1) { exit_code = SETSID_OPER_FAILED; @@ -1509,12 +1534,13 @@ int launch_container_as_user(const char *user, const char *app_id, goto cleanup; } +#ifdef __linux fprintf(LOGFILE, "Writing to cgroup task files...\n"); // cgroups-based resource enforcement if (resources_key != NULL && ! strcmp(resources_key, "cgroups")) { // write pid to cgroups char* const* cgroup_ptr; - for (cgroup_ptr = resources_values; cgroup_ptr != NULL && + for (cgroup_ptr = resources_values; cgroup_ptr != NULL && *cgroup_ptr != NULL; ++cgroup_ptr) { if (strcmp(*cgroup_ptr, "none") != 0 && write_pid_to_cgroup_as_root(*cgroup_ptr, pid) != 0) { @@ -1523,6 +1549,7 @@ int launch_container_as_user(const char *user, const char *app_id, } } } +#endif fprintf(LOGFILE, "Creating local dirs...\n"); exit_code = create_local_dirs(user, app_id, container_id, @@ -1582,11 +1609,8 @@ int signal_container_as_user(const char *user, int pid, int sig) { if (kill(-pid, sig) < 0) { if(errno != ESRCH) { - fprintf(LOGFILE, - "Error signalling process group %d with signal %d - %s\n", - -pid, sig, strerror(errno)); - fprintf(stderr, - "Error signalling process group %d with signal %d - %s\n", + fprintf(LOGFILE, + "Error signalling process group %d with signal %d - %s\n", -pid, sig, strerror(errno)); fflush(LOGFILE); return UNABLE_TO_SIGNAL_CONTAINER; @@ -1694,7 +1718,7 @@ static int nftw_cb(const char *path, * full_path : the path to delete * needs_tt_user: the top level directory must be deleted by the tt user. */ -static int delete_path(const char *full_path, +static int delete_path(const char *full_path, int needs_tt_user) { /* Return an error if the path is null. */ @@ -1813,7 +1837,7 @@ void chown_dir_contents(const char *dir_path, uid_t uid, gid_t gid) { char *buf = stpncpy(path_tmp, dir_path, strlen(dir_path)); *buf++ = '/'; - + dp = opendir(dir_path); if (dp != NULL) { while ((ep = readdir(dp)) != NULL) { @@ -1847,7 +1871,7 @@ int mount_cgroup(const char *pair, const char *hierarchy) { get_kv_value(pair, mount_path, strlen(pair)) < 0) { fprintf(LOGFILE, "Failed to mount cgroup controller; invalid option: %s\n", pair); - result = -1; + result = -1; } else { if (mount("none", mount_path, "cgroup", 0, controller) == 0) { char *buf = stpncpy(hier_path, mount_path, strlen(mount_path)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h index abf3c41..1c64c22 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.h @@ -15,6 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +/* FreeBSD protects the getline() prototype. See getline(3) for more */ +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + #include #include #include @@ -99,7 +105,7 @@ extern FILE *LOGFILE; extern FILE *ERRORFILE; // get the executable's filename -char* get_executable(); +char* get_executable(char *argv0); //function used to load the configurations present in the secure config void read_executor_config(const char* file_name); @@ -180,7 +186,7 @@ int signal_container_as_user(const char *user, int pid, int sig); // delete a directory (or file) recursively as the user. The directory // could optionally be relative to the baseDir set of directories (if the same // directory appears on multiple disk volumes, the disk volumes should be passed -// as the baseDirs). If baseDirs is not specified, then dir_to_be_deleted is +// as the baseDirs). If baseDirs is not specified, then dir_to_be_deleted is // assumed as the absolute path int delete_as_user(const char *user, const char *dir_to_be_deleted, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/get_executable.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/get_executable.c new file mode 100644 index 0000000..49ae093 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/get_executable.c @@ -0,0 +1,215 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This code implements OS-specific ways to get the absolute + * filename of the executable. Typically, one would use + * realpath(argv[0]) (or equivalent), however, because this + * code runs as setuid and will be used later on to determine + * relative paths, we want something a big more secure + * since argv[0] is replaceable by malicious code. + * + * NOTE! The value returned will be free()'d later on! + * + */ + +#include "config.h" +#include "configuration.h" +#include "container-executor.h" + +#include +#include +#include +#include +#include + +#ifdef HAVE_SYS_SYSCTL_H +#include +#include +#include +#endif + +/* + * A generic function to read a link and return + * the value for use with System V procfs. + * With much thanks to Tom Killian, Roger Faulkner, + * and Ron Gomes, this is pretty generic code. + */ + +char *__get_exec_readproc(char *procfn) { + char *filename; + ssize_t len; + + filename = malloc(EXECUTOR_PATH_MAX); + if (!filename) { + fprintf(ERRORFILE,"cannot allocate memory for filename before readlink: %s\n",strerror(errno)); + exit(-1); + } + len = readlink(procfn, filename, EXECUTOR_PATH_MAX); + if (len == -1) { + fprintf(ERRORFILE,"Can't get executable name from %s - %s\n", procfn, + strerror(errno)); + exit(-1); + } else if (len >= EXECUTOR_PATH_MAX) { + fprintf(ERRORFILE,"Resolved path for %s [%s] is longer than %d characters.\n", + procfn, filename, EXECUTOR_PATH_MAX); + exit(-1); + } + filename[len] = '\0'; + return filename; +} + + +#ifdef HAVE_SYSCTL +/* + * A generic function to ask the kernel via sysctl. + * This is used by most of the open source BSDs, as + * many do not reliably have a /proc mounted. + */ + +char *__get_exec_sysctl(int *mib) +{ + char buffer[EXECUTOR_PATH_MAX]; + char *filename; + size_t len; + + len = sizeof(buffer); + if (sysctl(mib, 4, buffer, &len, NULL, 0) == -1) { + fprintf(ERRORFILE,"Can't get executable name from kernel: %s\n", + strerror(errno)); + exit(-1); + } + filename=malloc(EXECUTOR_PATH_MAX); + if (!filename) { + fprintf(ERRORFILE,"cannot allocate memory for filename after sysctl: %s\n",strerror(errno)); + exit(-1); + } + snprintf(filename,EXECUTOR_PATH_MAX,"%s",buffer); + return filename; +} + +#endif /* HAVE_SYSCTL */ + +#ifdef __APPLE__ + +/* + * Mac OS X doesn't have a procfs, but there is + * libproc which we can use instead. It is available + * in most modern versions of OS X as of this writing (2016). + */ + +#include + +char* get_executable(char *argv0) { + char *filename; + pid_t pid; + + filename = malloc(PROC_PIDPATHINFO_MAXSIZE); + if (!filename) { + fprintf(ERRORFILE,"cannot allocate memory for filename before proc_pidpath: %s\n",strerror(errno)); + exit(-1); + } + pid = getpid(); + if (proc_pidpath(pid,filename,PROC_PIDPATHINFO_MAXSIZE) <= 0) { + fprintf(ERRORFILE,"Can't get executable name from pid %u - %s\n", pid, + strerror(errno)); + exit(-1); + } + return filename; +} + +#elif defined(__FreeBSD__) + +char* get_executable(char *argv0) { + static int mib[] = { + CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1 + }; + return __get_exec_sysctl(mib); +} + +#elif defined(__linux) + + +char* get_executable(char *argv0) { + return __get_exec_readproc("/proc/self/exe"); +} + +#elif defined(__NetBSD__) && defined(KERN_PROC_PATHNAME) + +/* Only really new NetBSD kernels have KERN_PROC_PATHNAME */ + +char* get_executable(char *argv0) { + static int mib[] = { + CTL_KERN, KERN_PROC_ARGS, -1, KERN_PROC_PATHNAME, + }; + return __get_exec_sysctl(mib); +} + +#elif defined(__sun) + +/* + * It's tempting to use getexecname(), but there is no guarantee + * we will get a full path and worse, we'd be reliant on getcwd() + * being where our exec is at. Instead, we'll use the /proc + * method, using the "invisible" /proc/self link that only the + * process itself can see. (Anyone that tells you /proc/self + * doesn't exist on Solaris hasn't read the proc(4) man page.) + */ + +char* get_executable(char *argv0) { + return __get_exec_readproc("/proc/self/path/a.out"); +} + +#elif defined(HADOOP_CONF_DIR_IS_ABS) + +/* + * This is the fallback for operating systems where + * we don't know how to ask the kernel where the executable + * is located. It is only used if the maven property + * container-executor.conf.dir is set to an absolute path + * for security reasons. + */ + +char* get_executable (char *argv0) { + char *filename; + +#ifdef HAVE_CANONICALIZE_FILE_NAME + filename=canonicalize_file_name(argv0); +#else + filename=realpath(argv0,NULL); +#endif + + if (!filename) { + fprintf(ERRORFILE,"realpath of executable: %s\n",strerror(errno)); + exit(-1); + } + return filename; +} + +#else + +/* + * If we ended up here, we're on an operating system that doesn't + * match any of the above. This means either the OS needs to get a + * code added or the container-executor.conf.dir maven property + * should be set to an absolute path. + */ + +#error Cannot safely determine executable path with a relative HADOOP_CONF_DIR on this operating system. + +#endif /* platform checks */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c index 5961e08..62ae3d8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/main.c @@ -129,7 +129,7 @@ static void flush_and_close_log_files() { fclose(LOGFILE); LOGFILE = NULL; } - + if (ERRORFILE != NULL) { fflush(ERRORFILE); fclose(ERRORFILE); @@ -142,19 +142,27 @@ in case of validation failures. Also sets up configuration / group information e This function is to be called in every invocation of container-executor, irrespective of whether an explicit checksetup operation is requested. */ -static void assert_valid_setup(char *current_executable) { - char *executable_file = get_executable(); +static void assert_valid_setup(char *argv0) { + int ret; + char *executable_file = get_executable(argv0); + if (!executable_file) { + fprintf(ERRORFILE,"realpath of executable: %s\n",strerror(errno)); + flush_and_close_log_files(); + exit(-1); + } char *orig_conf_file = HADOOP_CONF_DIR "/" CONF_FILENAME; - char *conf_file = resolve_config_path(orig_conf_file, current_executable); + char *conf_file = resolve_config_path(orig_conf_file, executable_file); if (conf_file == NULL) { + free(executable_file); fprintf(ERRORFILE, "Configuration file %s not found.\n", orig_conf_file); flush_and_close_log_files(); exit(INVALID_CONFIG_FILE); } if (check_configuration_permissions(conf_file) != 0) { + free(executable_file); flush_and_close_log_files(); exit(INVALID_CONFIG_FILE); } @@ -164,28 +172,42 @@ static void assert_valid_setup(char *current_executable) { // look up the node manager group in the config file char *nm_group = get_nodemanager_group(); if (nm_group == NULL) { + free(executable_file); fprintf(ERRORFILE, "Can't get configured value for %s.\n", NM_GROUP_KEY); flush_and_close_log_files(); exit(INVALID_CONFIG_FILE); } struct group *group_info = getgrnam(nm_group); if (group_info == NULL) { + free(executable_file); fprintf(ERRORFILE, "Can't get group information for %s - %s.\n", nm_group, strerror(errno)); flush_and_close_log_files(); exit(INVALID_CONFIG_FILE); } set_nm_uid(getuid(), group_info->gr_gid); - // if we are running from a setuid executable, make the real uid root - setuid(0); - // set the real and effective group id to the node manager group - setgid(group_info->gr_gid); + /* + * if we are running from a setuid executable, make the real uid root + * we're going to ignore this result just in case we aren't. + */ + ret=setuid(0); + + /* + * set the real and effective group id to the node manager group + * we're going to ignore this result just in case we aren't + */ + ret=setgid(group_info->gr_gid); + + /* make the unused var warning to away */ + ret++; if (check_executor_permissions(executable_file) != 0) { + free(executable_file); fprintf(ERRORFILE, "Invalid permissions on container-executor binary.\n"); flush_and_close_log_files(); exit(INVALID_CONTAINER_EXEC_PERMISSIONS); } + free(executable_file); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c index 2a76a58e..22941b1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/test/test-container-executor.c @@ -29,7 +29,19 @@ #include #include -#define TEST_ROOT "/tmp/test-container-executor" +#ifdef __APPLE__ +#include +#include + +#define TMPDIR "/private/tmp" +#define RELTMPDIR "../.." +#else +#define RELTMPDIR ".." +#define TMPDIR "/tmp" +#endif + +#define TEST_ROOT TMPDIR "/test-container-executor" + #define DONT_TOUCH_FILE "dont-touch-me" #define NM_LOCAL_DIRS TEST_ROOT "/local-1%" TEST_ROOT "/local-2%" \ TEST_ROOT "/local-3%" TEST_ROOT "/local-4%" TEST_ROOT "/local-5" @@ -155,8 +167,8 @@ void check_pid_file(const char* pid_file, pid_t mypid) { } void test_get_user_directory() { - char *user_dir = get_user_directory("/tmp", "user"); - char *expected = "/tmp/usercache/user"; + char *user_dir = get_user_directory(TMPDIR, "user"); + char *expected = TMPDIR "/usercache/user"; if (strcmp(user_dir, expected) != 0) { printf("test_get_user_directory expected %s got %s\n", expected, user_dir); exit(1); @@ -165,8 +177,8 @@ void test_get_user_directory() { } void test_get_app_directory() { - char *expected = "/tmp/usercache/user/appcache/app_200906101234_0001"; - char *app_dir = (char *) get_app_directory("/tmp", "user", + char *expected = TMPDIR "/usercache/user/appcache/app_200906101234_0001"; + char *app_dir = (char *) get_app_directory(TMPDIR, "user", "app_200906101234_0001"); if (strcmp(app_dir, expected) != 0) { printf("test_get_app_directory expected %s got %s\n", expected, app_dir); @@ -176,9 +188,9 @@ void test_get_app_directory() { } void test_get_container_directory() { - char *container_dir = get_container_work_directory("/tmp", "owen", "app_1", + char *container_dir = get_container_work_directory(TMPDIR, "owen", "app_1", "container_1"); - char *expected = "/tmp/usercache/owen/appcache/app_1/container_1"; + char *expected = TMPDIR"/usercache/owen/appcache/app_1/container_1"; if (strcmp(container_dir, expected) != 0) { printf("Fail get_container_work_directory got %s expected %s\n", container_dir, expected); @@ -188,9 +200,9 @@ void test_get_container_directory() { } void test_get_container_launcher_file() { - char *expected_file = ("/tmp/usercache/user/appcache/app_200906101234_0001" + char *expected_file = (TMPDIR"/usercache/user/appcache/app_200906101234_0001" "/launch_container.sh"); - char *app_dir = get_app_directory("/tmp", "user", + char *app_dir = get_app_directory(TMPDIR, "user", "app_200906101234_0001"); char *container_file = get_container_launcher_file(app_dir); if (strcmp(container_file, expected_file) != 0) { @@ -240,8 +252,9 @@ void test_resolve_config_path() { printf("FAIL: failed to resolve config_name on an absolute path name: /bin/ls\n"); exit(1); } - if (strcmp(resolve_config_path("../bin/ls", "/bin/ls"), "/bin/ls") != 0) { - printf("FAIL: failed to resolve config_name on a relative path name: ../bin/ls (relative to /bin/ls)"); + if (strcmp(resolve_config_path(RELTMPDIR TEST_ROOT, TEST_ROOT), TEST_ROOT) != 0) { + printf("FAIL: failed to resolve config_name on a relative path name: " + RELTMPDIR TEST_ROOT " (relative to " TEST_ROOT ")"); exit(1); } } @@ -264,9 +277,9 @@ void test_delete_container() { exit(1); } char* app_dir = get_app_directory(TEST_ROOT "/local-2", yarn_username, "app_1"); - char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, + char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, DONT_TOUCH_FILE); - char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", + char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", yarn_username, "app_1", "container_1"); char buffer[100000]; sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir); @@ -326,9 +339,9 @@ void test_delete_container() { void test_delete_app() { char* app_dir = get_app_directory(TEST_ROOT "/local-2", yarn_username, "app_2"); - char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, + char* dont_touch = get_app_directory(TEST_ROOT "/local-2", yarn_username, DONT_TOUCH_FILE); - char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", + char* container_dir = get_container_work_directory(TEST_ROOT "/local-2", yarn_username, "app_2", "container_1"); char buffer[100000]; sprintf(buffer, "mkdir -p %s/who/let/the/dogs/out/who/who", container_dir); @@ -486,7 +499,7 @@ void test_signal_container_group() { exit(0); } printf("Child container launched as %" PRId64 "\n", (int64_t)child); - // there's a race condition for child calling change_user and us + // there's a race condition for child calling change_user and us // calling signal_container_as_user, hence sleeping sleep(3); if (signal_container_as_user(yarn_username, child, SIGKILL) != 0) { @@ -502,7 +515,7 @@ void test_signal_container_group() { exit(1); } if (WTERMSIG(status) != SIGKILL) { - printf("FAIL: child was killed with %d instead of %d\n", + printf("FAIL: child was killed with %d instead of %d\n", WTERMSIG(status), SIGKILL); exit(1); } @@ -548,7 +561,7 @@ void test_init_app() { fflush(stderr); pid_t child = fork(); if (child == -1) { - printf("FAIL: failed to fork process for init_app - %s\n", + printf("FAIL: failed to fork process for init_app - %s\n", strerror(errno)); exit(1); } else if (child == 0) { @@ -630,7 +643,7 @@ void test_run_container() { printf("FAIL: failed to seteuid back to user - %s\n", strerror(errno)); exit(1); } - if (fprintf(script, "#!/bin/bash\n" + if (fprintf(script, "#!/usr/bin/env bash\n" "touch foobar\n" "exit 0") < 0) { printf("FAIL: fprintf failed - %s\n", strerror(errno)); @@ -642,17 +655,17 @@ void test_run_container() { } fflush(stdout); fflush(stderr); - char* container_dir = get_container_work_directory(TEST_ROOT "/local-1", + char* container_dir = get_container_work_directory(TEST_ROOT "/local-1", yarn_username, "app_4", "container_1"); const char * pid_file = TEST_ROOT "/pid.txt"; pid_t child = fork(); if (child == -1) { - printf("FAIL: failed to fork process for init_app - %s\n", + printf("FAIL: failed to fork process for init_app - %s\n", strerror(errno)); exit(1); } else if (child == 0) { - if (launch_container_as_user(yarn_username, "app_4", "container_1", + if (launch_container_as_user(yarn_username, "app_4", "container_1", container_dir, script_name, TEST_ROOT "/creds.txt", pid_file, local_dirs, log_dirs, "cgroups", cgroups_pids) != 0) { @@ -706,7 +719,7 @@ void test_run_container() { // effective user id. If executed by a super-user everything // gets tested. Here are different ways of execing the test binary: // 1. regular user assuming user == yarn user -// $ test-container-executor +// $ test-container-executor // 2. regular user with a given yarn user // $ test-container-executor yarn_user // 3. super user with a given user and assuming user == yarn user @@ -714,14 +727,16 @@ void test_run_container() { // 4. super user with a given user and a given yarn user // # test-container-executor user yarn_user int main(int argc, char **argv) { + int ret; LOGFILE = stdout; ERRORFILE = stderr; + printf("Attempting to clean up from any previous runs\n"); // clean up any junk from previous run if (system("chmod -R u=rwx " TEST_ROOT "; rm -fr " TEST_ROOT)) { exit(1); } - + if (mkdirs(TEST_ROOT "/logs/userlogs", 0755) != 0) { exit(1); } @@ -729,6 +744,9 @@ int main(int argc, char **argv) { if (write_config_file(TEST_ROOT "/test.cfg", 1) != 0) { exit(1); } + + printf("\nOur executable is %s\n",get_executable(argv[0])); + read_executor_config(TEST_ROOT "/test.cfg"); local_dirs = extract_values(strdup(NM_LOCAL_DIRS)); @@ -782,10 +800,36 @@ int main(int argc, char **argv) { test_check_user(0); +#ifdef __APPLE__ + printf("OS X: disabling CrashReporter\n"); + /* + * disable the "unexpectedly quit" dialog box + * because we know we're going to make our container + * do exactly that. + */ + CFStringRef crashType = CFSTR("DialogType"); + CFStringRef crashModeNone = CFSTR("None"); + CFStringRef crashAppID = CFSTR("com.apple.CrashReporter"); + CFStringRef crashOldMode = CFPreferencesCopyAppValue(CFSTR("DialogType"), CFSTR("com.apple.CrashReporter")); + + CFPreferencesSetAppValue(crashType, crashModeNone, crashAppID); + CFPreferencesAppSynchronize(crashAppID); +#endif + // the tests that change user need to be run in a subshell, so that // when they change user they don't give up our privs run_test_in_child("test_signal_container_group", test_signal_container_group); +#ifdef __APPLE__ + /* + * put the "unexpectedly quit" dialog back + */ + + CFPreferencesSetAppValue(crashType, crashOldMode, crashAppID); + CFPreferencesAppSynchronize(crashAppID); + printf("OS X: CrashReporter re-enabled\n"); +#endif + // init app and run container can't be run if you aren't testing as root if (getuid() == 0) { // these tests do internal forks so that the change_owner and execs @@ -794,7 +838,13 @@ int main(int argc, char **argv) { test_run_container(); } - seteuid(0); + /* + * try to seteuid(0). if it doesn't work, carry on anyway. + * we're going to capture the return value to get rid of a + * compiler warning. + */ + ret=seteuid(0); + ret++; // test_delete_user must run as root since that's how we use the delete_as_user test_delete_user(); free_executor_configurations(); @@ -805,11 +855,19 @@ int main(int argc, char **argv) { } read_executor_config(TEST_ROOT "/test.cfg"); +#ifdef __APPLE__ + username = "_uucp"; + test_check_user(1); + + username = "_networkd"; + test_check_user(1); +#else username = "bin"; test_check_user(1); username = "sys"; test_check_user(1); +#endif run("rm -fr " TEST_ROOT); printf("\nFinished tests\n"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml index f99b316..5415848 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server-resourcemanager - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN ResourceManager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 37cb76b..5371a21 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -673,7 +673,7 @@ private void refreshAll() throws ServiceFailedException { } refreshClusterMaxPriority(RefreshClusterMaxPriorityRequest.newInstance()); } catch (Exception ex) { - throw new ServiceFailedException(ex.getMessage()); + throw new ServiceFailedException("RefreshAll operation failed", ex); } } @@ -700,9 +700,10 @@ public AddToClusterNodeLabelsResponse addToClusterNodeLabels(AddToClusterNodeLab AddToClusterNodeLabelsResponse response = recordFactory.newRecordInstance(AddToClusterNodeLabelsResponse.class); try { - rmContext.getNodeLabelManager().addToCluserNodeLabels(request.getNodeLabels()); - RMAuditLogger - .logSuccess(user.getShortUserName(), operation, "AdminService"); + rmContext.getNodeLabelManager() + .addToCluserNodeLabels(request.getNodeLabels()); + RMAuditLogger.logSuccess(user.getShortUserName(), operation, + "AdminService"); return response; } catch (IOException ioe) { throw logAndWrapException(ioe, user.getShortUserName(), operation, msg); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index 33019ba..5957cb1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -1422,7 +1422,7 @@ public GetNodesToLabelsResponse getNodeToLabels( GetNodesToLabelsRequest request) throws YarnException, IOException { RMNodeLabelsManager labelsMgr = rmContext.getNodeLabelManager(); GetNodesToLabelsResponse response = - GetNodesToLabelsResponse.newInstance(labelsMgr.getNodeLabelsInfo()); + GetNodesToLabelsResponse.newInstance(labelsMgr.getNodeLabels()); return response; } @@ -1432,10 +1432,10 @@ public GetLabelsToNodesResponse getLabelsToNodes( RMNodeLabelsManager labelsMgr = rmContext.getNodeLabelManager(); if (request.getNodeLabels() == null || request.getNodeLabels().isEmpty()) { return GetLabelsToNodesResponse.newInstance( - labelsMgr.getLabelsInfoToNodes()); + labelsMgr.getLabelsToNodes()); } else { return GetLabelsToNodesResponse.newInstance( - labelsMgr.getLabelsInfoToNodes(request.getNodeLabels())); + labelsMgr.getLabelsToNodes(request.getNodeLabels())); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index 7b1f2f9..469db2a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -252,15 +252,6 @@ protected void serviceInit(Configuration conf) throws Exception { this.rmContext.setHAEnabled(HAUtil.isHAEnabled(this.conf)); if (this.rmContext.isHAEnabled()) { HAUtil.verifyAndSetConfiguration(this.conf); - - // If the RM is configured to use an embedded leader elector, - // initialize the leader elector. - if (HAUtil.isAutomaticFailoverEnabled(conf) && - HAUtil.isAutomaticFailoverEmbedded(conf)) { - EmbeddedElector elector = createEmbeddedElector(); - addIfService(elector); - rmContext.setLeaderElectorService(elector); - } } // Set UGI and do login @@ -278,10 +269,27 @@ protected void serviceInit(Configuration conf) throws Exception { addIfService(rmDispatcher); rmContext.setDispatcher(rmDispatcher); + // The order of services below should not be changed as services will be + // started in same order + // As elector service needs admin service to be initialized and started, + // first we add admin service then elector service + adminService = createAdminService(); addService(adminService); rmContext.setRMAdminService(adminService); + // elector must be added post adminservice + if (this.rmContext.isHAEnabled()) { + // If the RM is configured to use an embedded leader elector, + // initialize the leader elector. + if (HAUtil.isAutomaticFailoverEnabled(conf) + && HAUtil.isAutomaticFailoverEmbedded(conf)) { + EmbeddedElector elector = createEmbeddedElector(); + addIfService(elector); + rmContext.setLeaderElectorService(elector); + } + } + rmContext.setYarnConfiguration(conf); createAndInitActiveServices(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java index e513198..a81648b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/amlauncher/AMLauncher.java @@ -51,7 +51,6 @@ import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.client.NMProxy; -import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.EventHandler; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -187,25 +186,12 @@ private ContainerLaunchContext createAMContainerLaunchContext( ContainerLaunchContext container = applicationMasterContext.getAMContainerSpec(); - // Populate the current queue name in the environment variable. - setupQueueNameEnv(container, applicationMasterContext); - // Finalize the container setupTokens(container, containerID); - + return container; } - private void setupQueueNameEnv(ContainerLaunchContext container, - ApplicationSubmissionContext applicationMasterContext) { - String queueName = applicationMasterContext.getQueue(); - if (queueName == null) { - queueName = YarnConfiguration.DEFAULT_QUEUE_NAME; - } - container.getEnvironment().put(ApplicationConstants.Environment - .YARN_RESOURCEMANAGER_APPLICATION_QUEUE.key(), queueName); - } - @Private @VisibleForTesting protected void setupTokens( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 47c83bc..f0a4eb1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -257,6 +257,7 @@ protected synchronized void containerLaunchedOnNode( } application.containerLaunchedOnNode(containerId, node.getNodeID()); + node.containerStarted(containerId); } protected void containerIncreasedOnNode(ContainerId containerId, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java index 8a3fd8b..a494b7c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/SchedulerNode.java @@ -65,8 +65,8 @@ ResourceUtilization.newInstance(0, 0, 0f); /* set of containers that are allocated containers */ - protected final Map launchedContainers = - new HashMap(); + private final Map launchedContainers = + new HashMap<>(); private final RMNode rmNode; private final String nodeName; @@ -148,12 +148,24 @@ public String getRackName() { * @param rmContainer * allocated container */ - public synchronized void allocateContainer(RMContainer rmContainer) { + public void allocateContainer(RMContainer rmContainer) { + allocateContainer(rmContainer, false); + } + + /** + * The Scheduler has allocated containers on this node to the given + * application. + * @param rmContainer Allocated container + * @param launchedOnNode True if the container has been launched + */ + private synchronized void allocateContainer(RMContainer rmContainer, + boolean launchedOnNode) { Container container = rmContainer.getContainer(); deductAvailableResource(container.getResource()); ++numContainers; - launchedContainers.put(container.getId(), rmContainer); + launchedContainers.put(container.getId(), + new ContainerInfo(rmContainer, launchedOnNode)); LOG.info("Assigned container " + container.getId() + " of capacity " + container.getResource() + " on host " + rmNode.getNodeAddress() @@ -236,20 +248,25 @@ protected synchronized void updateResourceForReleasedContainer( /** * Release an allocated container on this node. * - * @param container - * container to be released + * @param containerId ID of container to be released. + * @param releasedByNode whether the release originates from a node update. */ - public synchronized void releaseContainer(Container container) { - if (!isValidContainer(container.getId())) { - LOG.error("Invalid container released " + container); + public synchronized void releaseContainer(ContainerId containerId, + boolean releasedByNode) { + ContainerInfo info = launchedContainers.get(containerId); + if (info == null) { return; } - /* remove the containers from the nodemanger */ - if (null != launchedContainers.remove(container.getId())) { - updateResourceForReleasedContainer(container); + if (!releasedByNode && info.launchedOnNode) { + // wait until node reports container has completed + return; } + launchedContainers.remove(containerId); + Container container = info.container.getContainer(); + updateResourceForReleasedContainer(container); + LOG.info("Released container " + container.getId() + " of capacity " + container.getResource() + " on host " + rmNode.getNodeAddress() + ", which currently has " + numContainers + " containers, " @@ -257,6 +274,17 @@ public synchronized void releaseContainer(Container container) { + " available" + ", release resources=" + true); } + /** + * Inform the node that a container has launched. + * @param containerId ID of the launched container + */ + public synchronized void containerStarted(ContainerId containerId) { + ContainerInfo info = launchedContainers.get(containerId); + if (info != null) { + info.launchedOnNode = true; + } + } + private synchronized void addAvailableResource(Resource resource) { if (resource == null) { LOG.error("Invalid resource addition of null resource for " @@ -305,7 +333,25 @@ public int getNumContainers() { } public synchronized List getCopiedListOfRunningContainers() { - return new ArrayList(launchedContainers.values()); + List result = new ArrayList<>(launchedContainers.size()); + for (ContainerInfo info : launchedContainers.values()) { + result.add(info.container); + } + return result; + } + + /** + * Get the container for the specified container ID. + * @param containerId The container ID + * @return The container for the specified container ID + */ + protected synchronized RMContainer getContainer(ContainerId containerId) { + RMContainer container = null; + ContainerInfo info = launchedContainers.get(containerId); + if (info != null) { + container = info.container; + } + return container; } public synchronized RMContainer getReservedContainer() { @@ -321,7 +367,7 @@ public synchronized void recoverContainer(RMContainer rmContainer) { if (rmContainer.getState().equals(RMContainerState.COMPLETED)) { return; } - allocateContainer(rmContainer); + allocateContainer(rmContainer, true); } public Set getLabels() { @@ -377,4 +423,15 @@ public void setNodeUtilization(ResourceUtilization nodeUtilization) { public ResourceUtilization getNodeUtilization() { return this.nodeUtilization; } + + + private static class ContainerInfo { + private final RMContainer container; + private boolean launchedOnNode; + + public ContainerInfo(RMContainer container, boolean launchedOnNode) { + this.container = container; + this.launchedOnNode = launchedOnNode; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java index ca4a2fd..94dfebd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/CapacityScheduler.java @@ -1074,6 +1074,7 @@ private synchronized void nodeUpdate(RMNode nm) { RMContainer container = getRMContainer(containerId); super.completedContainer(container, completedContainer, RMContainerEventType.FINISHED); + node.releaseContainer(containerId, true); if (container != null) { releasedContainers++; Resource rs = container.getAllocatedResource(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java index 86e8c09..77f10b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/LeafQueue.java @@ -1451,7 +1451,7 @@ public void completedContainer(Resource clusterResource, application.containerCompleted(rmContainer, containerStatus, event, node.getPartition()); - node.releaseContainer(container); + node.releaseContainer(rmContainer.getContainerId(), false); } // Book-keeping diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PartitionedQueueComparator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PartitionedQueueComparator.java index 477c615..72fe674 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PartitionedQueueComparator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PartitionedQueueComparator.java @@ -19,16 +19,20 @@ package org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity; import java.util.Comparator; +import java.util.function.Supplier; import org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager; public class PartitionedQueueComparator implements Comparator { - private String partitionToLookAt = null; + private ThreadLocal partitionToLookAt = new ThreadLocal<>(); + + PartitionedQueueComparator() { + partitionToLookAt.set(RMNodeLabelsManager.NO_LABEL); + } public void setPartitionToLookAt(String partitionToLookAt) { - this.partitionToLookAt = partitionToLookAt; + this.partitionToLookAt.set(partitionToLookAt); } - @Override public int compare(CSQueue q1, CSQueue q2) { @@ -36,11 +40,12 @@ public int compare(CSQueue q1, CSQueue q2) { * 1. Check accessible to given partition, if one queue accessible and * the other not, accessible queue goes first. */ + String p = partitionToLookAt.get(); boolean q1Accessible = - q1.getAccessibleNodeLabels().contains(partitionToLookAt) + q1.getAccessibleNodeLabels().contains(p) || q1.getAccessibleNodeLabels().contains(RMNodeLabelsManager.ANY); boolean q2Accessible = - q2.getAccessibleNodeLabels().contains(partitionToLookAt) + q2.getAccessibleNodeLabels().contains(p) || q2.getAccessibleNodeLabels().contains(RMNodeLabelsManager.ANY); if (q1Accessible && !q2Accessible) { return -1; @@ -53,12 +58,12 @@ public int compare(CSQueue q1, CSQueue q2) { * 2. When two queue has same accessibility, check who will go first: * Now we simply compare their used resource on the partition to lookAt */ - float used1 = q1.getQueueCapacities().getUsedCapacity(partitionToLookAt); - float used2 = q2.getQueueCapacities().getUsedCapacity(partitionToLookAt); + float used1 = q1.getQueueCapacities().getUsedCapacity(p); + float used2 = q2.getQueueCapacities().getUsedCapacity(p); if (Math.abs(used1 - used2) < 1e-6) { // When used capacity is same, compare their guaranteed-capacity - float cap1 = q1.getQueueCapacities().getCapacity(partitionToLookAt); - float cap2 = q2.getQueueCapacities().getCapacity(partitionToLookAt); + float cap1 = q1.getQueueCapacities().getCapacity(p); + float cap2 = q2.getQueueCapacities().getCapacity(p); // when cap1 == cap2, we will compare queue's name if (Math.abs(cap1 - cap2) < 1e-6) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java index f90a53c..078328c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerNode.java @@ -125,7 +125,7 @@ public synchronized void unreserveResource( // According to decisions from preemption policy, mark the container to killable public synchronized void markContainerToKillable(ContainerId containerId) { - RMContainer c = launchedContainers.get(containerId); + RMContainer c = getContainer(containerId); if (c != null && !killableContainers.containsKey(containerId)) { killableContainers.put(containerId, c); Resources.addTo(totalKillableResources, c.getAllocatedResource()); @@ -135,7 +135,7 @@ public synchronized void markContainerToKillable(ContainerId containerId) { // According to decisions from preemption policy, mark the container to // non-killable public synchronized void markContainerToNonKillable(ContainerId containerId) { - RMContainer c = launchedContainers.get(containerId); + RMContainer c = getContainer(containerId); if (c != null && killableContainers.containsKey(containerId)) { killableContainers.remove(containerId); Resources.subtractFrom(totalKillableResources, c.getAllocatedResource()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java index 960339e..1586a49 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/FairScheduler.java @@ -871,7 +871,7 @@ protected synchronized void completedContainerInternal( application.unreserve(rmContainer.getReservedPriority(), node); } else { application.containerCompleted(rmContainer, containerStatus, event); - node.releaseContainer(container); + node.releaseContainer(rmContainer.getContainerId(), false); updateRootQueueMetrics(); } @@ -1053,6 +1053,7 @@ private synchronized void nodeUpdate(RMNode nm) { LOG.debug("Container FINISHED: " + containerId); super.completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); + node.releaseContainer(containerId, true); } // If the node is decommissioning, send an update to have the total diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java index 664bd4e..290271d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fifo/FifoScheduler.java @@ -746,6 +746,7 @@ private synchronized void nodeUpdate(RMNode rmNode) { LOG.debug("Container FINISHED: " + containerId); super.completedContainer(getRMContainer(containerId), completedContainer, RMContainerEventType.FINISHED); + node.releaseContainer(containerId, true); } // Updating node resource utilization @@ -917,7 +918,7 @@ protected synchronized void completedContainerInternal( RMNodeLabelsManager.NO_LABEL); // Inform the node - node.releaseContainer(container); + node.releaseContainer(rmContainer.getContainerId(), false); // Update total usage Resources.subtractFrom(usedResource, container.getResource()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java index 99ad9f6..9e959235 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestApplicationMasterLauncher.java @@ -94,7 +94,6 @@ String nmHostAtContainerManager = null; long submitTimeAtContainerManager; int maxAppAttempts; - private String queueName; @Override public StartContainersResponse @@ -124,8 +123,6 @@ Long.parseLong(env.get(ApplicationConstants.APP_SUBMIT_TIME_ENV)); maxAppAttempts = Integer.parseInt(env.get(ApplicationConstants.MAX_APP_ATTEMPTS_ENV)); - queueName = env.get(ApplicationConstants.Environment - .YARN_RESOURCEMANAGER_APPLICATION_QUEUE.key()); return StartContainersResponse.newInstance( new HashMap(), new ArrayList(), new HashMap()); @@ -194,8 +191,6 @@ public void testAMLaunchAndCleanup() throws Exception { containerManager.nmHostAtContainerManager); Assert.assertEquals(YarnConfiguration.DEFAULT_RM_AM_MAX_ATTEMPTS, containerManager.maxAppAttempts); - Assert.assertEquals(YarnConfiguration.DEFAULT_QUEUE_NAME, - containerManager.queueName); MockAM am = new MockAM(rm.getRMContext(), rm .getApplicationMasterService(), appAttemptId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java index 12d4aba..5f6aa03 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestClientRMService.java @@ -1511,27 +1511,19 @@ protected ClientRMService createClientRMService() { // Get node labels mapping GetNodesToLabelsResponse response1 = client .getNodeToLabels(GetNodesToLabelsRequest.newInstance()); - Map> nodeToLabels = response1.getNodeToLabels(); + Map> nodeToLabels = response1.getNodeToLabels(); Assert.assertTrue(nodeToLabels.keySet().containsAll( Arrays.asList(node1, node2))); Assert.assertTrue(nodeToLabels.get(node1) - .containsAll(Arrays.asList(labelX))); + .containsAll(Arrays.asList(labelX.getName()))); Assert.assertTrue(nodeToLabels.get(node2) - .containsAll(Arrays.asList(labelY))); - // Verify whether labelX's exclusivity is false - for (NodeLabel x : nodeToLabels.get(node1)) { - Assert.assertFalse(x.isExclusive()); - } - // Verify whether labelY's exclusivity is true - for (NodeLabel y : nodeToLabels.get(node2)) { - Assert.assertTrue(y.isExclusive()); - } + .containsAll(Arrays.asList(labelY.getName()))); // Below label "x" is not present in the response as exclusivity is true Assert.assertFalse(nodeToLabels.get(node1).containsAll( Arrays.asList(NodeLabel.newInstance("x")))); rpc.stopProxy(client, conf); - rm.close(); + rm.stop(); } @Test @@ -1582,18 +1574,14 @@ protected ClientRMService createClientRMService() { // Get labels to nodes mapping GetLabelsToNodesResponse response1 = client .getLabelsToNodes(GetLabelsToNodesRequest.newInstance()); - Map> labelsToNodes = response1.getLabelsToNodes(); - // Verify whether all NodeLabel's exclusivity are false - for (Map.Entry> nltn : labelsToNodes.entrySet()) { - Assert.assertFalse(nltn.getKey().isExclusive()); - } + Map> labelsToNodes = response1.getLabelsToNodes(); Assert.assertTrue(labelsToNodes.keySet().containsAll( - Arrays.asList(labelX, labelY, labelZ))); - Assert.assertTrue(labelsToNodes.get(labelX).containsAll( + Arrays.asList(labelX.getName(), labelY.getName(), labelZ.getName()))); + Assert.assertTrue(labelsToNodes.get(labelX.getName()).containsAll( Arrays.asList(node1A))); - Assert.assertTrue(labelsToNodes.get(labelY).containsAll( + Assert.assertTrue(labelsToNodes.get(labelY.getName()).containsAll( Arrays.asList(node2A, node3A))); - Assert.assertTrue(labelsToNodes.get(labelZ).containsAll( + Assert.assertTrue(labelsToNodes.get(labelZ.getName()).containsAll( Arrays.asList(node1B, node3B))); // Get labels to nodes mapping for specific labels @@ -1602,17 +1590,13 @@ protected ClientRMService createClientRMService() { GetLabelsToNodesResponse response2 = client .getLabelsToNodes(GetLabelsToNodesRequest.newInstance(setlabels)); labelsToNodes = response2.getLabelsToNodes(); - // Verify whether all NodeLabel's exclusivity are false - for (Map.Entry> nltn : labelsToNodes.entrySet()) { - Assert.assertFalse(nltn.getKey().isExclusive()); - } Assert.assertTrue(labelsToNodes.keySet().containsAll( - Arrays.asList(labelX, labelZ))); - Assert.assertTrue(labelsToNodes.get(labelX).containsAll( + Arrays.asList(labelX.getName(), labelZ.getName()))); + Assert.assertTrue(labelsToNodes.get(labelX.getName()).containsAll( Arrays.asList(node1A))); - Assert.assertTrue(labelsToNodes.get(labelZ).containsAll( + Assert.assertTrue(labelsToNodes.get(labelZ.getName()).containsAll( Arrays.asList(node1B, node3B))); - Assert.assertEquals(labelsToNodes.get(labelY), null); + Assert.assertEquals(labelsToNodes.get(labelY.getName()), null); rpc.stopProxy(client, conf); rm.close(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java index 8fa26dc..6a118ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/TestAbstractYarnScheduler.java @@ -35,6 +35,7 @@ import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ContainerStatus; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceOption; @@ -436,6 +437,119 @@ public void testReleasedContainerIfAppAttemptisNull() throws Exception { } } + @Test(timeout=60000) + public void testContainerReleasedByNode() throws Exception { + System.out.println("Starting testContainerReleasedByNode"); + configureScheduler(); + YarnConfiguration conf = getConf(); + MockRM rm1 = new MockRM(conf); + try { + rm1.start(); + RMApp app1 = + rm1.submitApp(200, "name", "user", + new HashMap(), false, "default", + -1, null, "Test", false, true); + MockNM nm1 = + new MockNM("127.0.0.1:1234", 10240, rm1.getResourceTrackerService()); + nm1.registerNode(); + + MockAM am1 = MockRM.launchAndRegisterAM(app1, rm1, nm1); + + // allocate a container that fills more than half the node + am1.allocate("127.0.0.1", 8192, 1, new ArrayList()); + nm1.nodeHeartbeat(true); + + // wait for containers to be allocated. + List containers = + am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (containers.isEmpty()) { + Thread.sleep(10); + nm1.nodeHeartbeat(true); + containers = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + } + + // release the container from the AM + ContainerId cid = containers.get(0).getId(); + List releasedContainers = new ArrayList<>(1); + releasedContainers.add(cid); + List completedContainers = am1.allocate( + new ArrayList(), releasedContainers) + .getCompletedContainersStatuses(); + while (completedContainers.isEmpty()) { + Thread.sleep(10); + completedContainers = am1.allocate( + new ArrayList(), releasedContainers) + .getCompletedContainersStatuses(); + } + + // verify new container can be allocated immediately because container + // never launched on the node + containers = am1.allocate("127.0.0.1", 8192, 1, + new ArrayList()).getAllocatedContainers(); + nm1.nodeHeartbeat(true); + while (containers.isEmpty()) { + Thread.sleep(10); + nm1.nodeHeartbeat(true); + containers = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + } + + // launch the container on the node + cid = containers.get(0).getId(); + nm1.nodeHeartbeat(cid.getApplicationAttemptId(), cid.getContainerId(), + ContainerState.RUNNING); + rm1.waitForState(nm1, cid, RMContainerState.RUNNING); + + // release the container from the AM + releasedContainers.clear(); + releasedContainers.add(cid); + completedContainers = am1.allocate( + new ArrayList(), releasedContainers) + .getCompletedContainersStatuses(); + while (completedContainers.isEmpty()) { + Thread.sleep(10); + completedContainers = am1.allocate( + new ArrayList(), releasedContainers) + .getCompletedContainersStatuses(); + } + + // verify new container cannot be allocated immediately because container + // has not been released by the node + containers = am1.allocate("127.0.0.1", 8192, 1, + new ArrayList()).getAllocatedContainers(); + nm1.nodeHeartbeat(true); + Assert.assertTrue("new container allocated before node freed old", + containers.isEmpty()); + for (int i = 0; i < 10; ++i) { + Thread.sleep(10); + containers = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + nm1.nodeHeartbeat(true); + Assert.assertTrue("new container allocated before node freed old", + containers.isEmpty()); + } + + // free the old container from the node + nm1.nodeHeartbeat(cid.getApplicationAttemptId(), cid.getContainerId(), + ContainerState.COMPLETE); + + // verify new container is now allocated + containers = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + while (containers.isEmpty()) { + Thread.sleep(10); + nm1.nodeHeartbeat(true); + containers = am1.allocate(new ArrayList(), + new ArrayList()).getAllocatedContainers(); + } + } finally { + rm1.stop(); + System.out.println("Stopping testContainerReleasedByNode"); + } + } + @Test(timeout = 60000) public void testResourceRequestRestoreWhenRMContainerIsAtAllocated() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java index 98d3dfb..db8ec886 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestChildQueueOrder.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; import static org.mockito.Matchers.eq; import static org.mockito.Mockito.doAnswer; import static org.mockito.Mockito.doNothing; @@ -159,7 +160,8 @@ public CSAssignment answer(InvocationOnMock invocation) throws Throwable { }). when(queue).assignContainers(eq(clusterResource), eq(node), any(ResourceLimits.class), any(SchedulingMode.class)); - doNothing().when(node).releaseContainer(any(Container.class)); + doNothing().when(node).releaseContainer(any(ContainerId.class), + anyBoolean()); } @@ -230,7 +232,8 @@ public void testSortedQueues() throws Exception { FiCaSchedulerNode node_0 = TestUtils.getMockNode("host_0", DEFAULT_RACK, 0, memoryPerNode*GB); - doNothing().when(node_0).releaseContainer(any(Container.class)); + doNothing().when(node_0).releaseContainer(any(ContainerId.class), + anyBoolean()); final Resource clusterResource = Resources.createResource(numNodes * (memoryPerNode*GB), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml index d0eb760..585cb54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml @@ -17,11 +17,11 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 org.apache.hadoop hadoop-yarn-server-sharedcachemanager - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN SharedCacheManager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml index f63c83f..54c9d77 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 org.apache.hadoop hadoop-yarn-server-tests - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Server Tests diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml index d27d228..bd6b260 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml @@ -22,12 +22,12 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server-timeline-pluginstorage - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Timeline Plugin Storage diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml index a45e1d9..17bd705 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-server org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server-web-proxy - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Web Proxy diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml index d83ac24..86c9aba 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-server - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Server pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml index 0bcf2fb..07c6fe9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn org.apache.hadoop - 2.8.0-SNAPSHOT + 2.8.0 4.0.0 org.apache.hadoop hadoop-yarn-site - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop YARN Site pom diff --git a/hadoop-yarn-project/hadoop-yarn/pom.xml b/hadoop-yarn-project/hadoop-yarn/pom.xml index 06b4a15..d5d1f09 100644 --- a/hadoop-yarn-project/hadoop-yarn/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/pom.xml @@ -16,12 +16,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../../hadoop-project org.apache.hadoop hadoop-yarn - 2.8.0-SNAPSHOT + 2.8.0 pom Apache Hadoop YARN diff --git a/hadoop-yarn-project/pom.xml b/hadoop-yarn-project/pom.xml index 4420753..ac6248b 100644 --- a/hadoop-yarn-project/pom.xml +++ b/hadoop-yarn-project/pom.xml @@ -18,12 +18,12 @@ org.apache.hadoop hadoop-project - 2.8.0-SNAPSHOT + 2.8.0 ../hadoop-project org.apache.hadoop hadoop-yarn-project - 2.8.0-SNAPSHOT + 2.8.0 pom Apache Hadoop YARN Project http://hadoop.apache.org/yarn/ diff --git a/pom.xml b/pom.xml index acf4c04..8d70fe4 100644 --- a/pom.xml +++ b/pom.xml @@ -18,7 +18,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs 4.0.0 org.apache.hadoop hadoop-main - 2.8.0-SNAPSHOT + 2.8.0 Apache Hadoop Main Apache Hadoop Main pom