diff --git a/.gitignore b/.gitignore index 6ece6ca..70767ba 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ hadoop-common-project/hadoop-common/src/test/resources/contract-test-options.xml hadoop-tools/hadoop-openstack/src/test/resources/contract-test-options.xml hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/src/main/tla/yarnregistry.toolbox yarnregistry.pdf +hadoop-tools/hadoop-aws/src/test/resources/contract-test-options.xml diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md index bc66e67..444fb60 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/testing.md @@ -28,53 +28,6 @@ These filesystem bindings must be defined in an XML configuration file, usually `hadoop-common-project/hadoop-common/src/test/resources/contract-test-options.xml`. This file is excluded should not be checked in. -### s3:// - -In `contract-test-options.xml`, the filesystem name must be defined in the property `fs.contract.test.fs.s3`. The standard configuration options to define the S3 authentication details must also be provided. - -Example: - - - - fs.contract.test.fs.s3 - s3://tests3hdfs/ - - - - fs.s3.awsAccessKeyId - DONOTPCOMMITTHISKEYTOSCM - - - - fs.s3.awsSecretAccessKey - DONOTEVERSHARETHISSECRETKEY! - - - -### s3n:// - - -In `contract-test-options.xml`, the filesystem name must be defined in the property `fs.contract.test.fs.s3n`. The standard configuration options to define the S3N authentication details muse also be provided. - -Example: - - - - - fs.contract.test.fs.s3n - s3n://tests3contract - - - - fs.s3n.awsAccessKeyId - DONOTPCOMMITTHISKEYTOSCM - - - - fs.s3n.awsSecretAccessKey - DONOTEVERSHARETHISSECRETKEY! - - ### ftp:// diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java index 36f2fdb..c14a002 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemContractBaseTest.java @@ -464,7 +464,7 @@ protected void createFile(Path path) throws IOException { out.close(); } - private void rename(Path src, Path dst, boolean renameSucceeded, + protected void rename(Path src, Path dst, boolean renameSucceeded, boolean srcExists, boolean dstExists) throws IOException { assertEquals("Rename result", renameSucceeded, fs.rename(src, dst)); assertEquals("Source exists", srcExists, fs.exists(src)); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java index c90efd1..2bd60ca 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractDeleteTest.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.contract; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileSystem; import org.junit.Test; import java.io.IOException; @@ -94,4 +95,30 @@ public void testDeleteNonEmptyDirRecursive() throws Throwable { ContractTestUtils.assertPathDoesNotExist(getFileSystem(), "not deleted", file); } + @Test + public void testDeleteDeepEmptyDir() throws Throwable { + mkdirs(path("testDeleteDeepEmptyDir/d1/d2/d3/d4")); + assertDeleted(path("testDeleteDeepEmptyDir/d1/d2/d3"), true); + + FileSystem fs = getFileSystem(); + ContractTestUtils.assertPathDoesNotExist(fs, + "not deleted", path("testDeleteDeepEmptyDir/d1/d2/d3/d4")); + ContractTestUtils.assertPathDoesNotExist(fs, + "not deleted", path("testDeleteDeepEmptyDir/d1/d2/d3")); + ContractTestUtils.assertPathExists(fs, "parent dir is deleted", + path("testDeleteDeepEmptyDir/d1/d2")); + } + + @Test + public void testDeleteSingleFile() throws Throwable { + // Test delete of just a file + Path path = path("testDeleteSingleFile/d1/d2"); + mkdirs(path); + Path file = new Path(path, "childfile"); + ContractTestUtils.writeTextFile(getFileSystem(), file, + "single file to be deleted.", true); + ContractTestUtils.assertPathExists(getFileSystem(), + "single file not created", file); + assertDeleted(file, false); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java index dad3b7f..86fd61f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java @@ -112,4 +112,23 @@ public void testMkdirOverParentFile() throws Throwable { assertPathExists("mkdir failed", path); assertDeleted(path, true); } + + @Test + public void testMkdirSlashHandling() throws Throwable { + describe("verify mkdir slash handling"); + FileSystem fs = getFileSystem(); + + // No trailing slash + assertTrue(fs.mkdirs(path("testmkdir/a"))); + assertPathExists("mkdir without trailing slash failed", + path("testmkdir/a")); + + // With trailing slash + assertTrue(fs.mkdirs(path("testmkdir/b/"))); + assertPathExists("mkdir with trailing slash failed", path("testmkdir/b/")); + + // Mismatched slashes + assertPathExists("check path existence without trailing slash failed", + path("testmkdir/b")); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java index 32f27a7..04c444d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractRenameTest.java @@ -182,4 +182,45 @@ public void testRenameFileNonexistentDir() throws Throwable { assertFalse(renameCreatesDestDirs); } } + + @Test + public void testRenameWithNonEmptySubDir() throws Throwable { + final Path renameTestDir = path("testRenameWithNonEmptySubDir"); + final Path srcDir = new Path(renameTestDir, "src1"); + final Path srcSubDir = new Path(srcDir, "sub"); + final Path finalDir = new Path(renameTestDir, "dest"); + FileSystem fs = getFileSystem(); + boolean renameRemoveEmptyDest = isSupported(RENAME_REMOVE_DEST_IF_EMPTY_DIR); + ContractTestUtils.rm(fs, renameTestDir, true, false); + + fs.mkdirs(srcDir); + fs.mkdirs(finalDir); + ContractTestUtils.writeTextFile(fs, new Path(srcDir, "source.txt"), + "this is the file in src dir", false); + ContractTestUtils.writeTextFile(fs, new Path(srcSubDir, "subfile.txt"), + "this is the file in src/sub dir", false); + + ContractTestUtils.assertPathExists(fs, "not created in src dir", + new Path(srcDir, "source.txt")); + ContractTestUtils.assertPathExists(fs, "not created in src/sub dir", + new Path(srcSubDir, "subfile.txt")); + + fs.rename(srcDir, finalDir); + // Accept both POSIX rename behavior and CLI rename behavior + if (renameRemoveEmptyDest) { + // POSIX rename behavior + ContractTestUtils.assertPathExists(fs, "not renamed into dest dir", + new Path(finalDir, "source.txt")); + ContractTestUtils.assertPathExists(fs, "not renamed into dest/sub dir", + new Path(finalDir, "sub/subfile.txt")); + } else { + // CLI rename behavior + ContractTestUtils.assertPathExists(fs, "not renamed into dest dir", + new Path(finalDir, "src1/source.txt")); + ContractTestUtils.assertPathExists(fs, "not renamed into dest/sub dir", + new Path(finalDir, "src1/sub/subfile.txt")); + } + ContractTestUtils.assertPathDoesNotExist(fs, "not deleted", + new Path(srcDir, "source.txt")); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java index 61279b0..d9427c6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractOptions.java @@ -80,6 +80,13 @@ "rename-returns-false-if-source-missing"; /** + * Flag to indicate that the FS remove dest first if it is an empty directory + * mean the FS honors POSIX rename behavior. + * @{value} + */ + String RENAME_REMOVE_DEST_IF_EMPTY_DIR = "rename-remove-dest-if-empty-dir"; + + /** * Flag to indicate that append is supported * @{value} */ diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java index cd9cc1b..3f16724 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java @@ -31,8 +31,11 @@ import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; import java.util.Arrays; import java.util.Properties; +import java.util.UUID; /** * Utilities used across test cases @@ -44,6 +47,13 @@ public static final String IO_FILE_BUFFER_SIZE = "io.file.buffer.size"; + // For scale testing, we can repeatedly write small chunk data to generate + // a large file. + public static final String IO_CHUNK_BUFFER_SIZE = "io.chunk.buffer.size"; + public static final int DEFAULT_IO_CHUNK_BUFFER_SIZE = 128; + public static final String IO_CHUNK_MODULUS_SIZE = "io.chunk.modulus.size"; + public static final int DEFAULT_IO_CHUNK_MODULUS_SIZE = 128; + /** * Assert that a property in the property set matches the expected value * @param props property set @@ -755,5 +765,134 @@ public static void validateFileContent(byte[] concat, byte[][] bytes) { mismatch); } + /** + * Receives test data from the given input file and checks the size of the + * data as well as the pattern inside the received data. + * + * @param fs FileSystem + * @param path Input file to be checked + * @param expectedSize the expected size of the data to be read from the + * input file in bytes + * @param bufferLen Pattern length + * @param modulus Pattern modulus + * @throws IOException + * thrown if an error occurs while reading the data + */ + public static void verifyReceivedData(FileSystem fs, Path path, + final long expectedSize, + final int bufferLen, + final int modulus) throws IOException { + final byte[] testBuffer = new byte[bufferLen]; + + long totalBytesRead = 0; + int nextExpectedNumber = 0; + final InputStream inputStream = fs.open(path); + try { + while (true) { + final int bytesRead = inputStream.read(testBuffer); + if (bytesRead < 0) { + break; + } + + totalBytesRead += bytesRead; + + for (int i = 0; i < bytesRead; ++i) { + if (testBuffer[i] != nextExpectedNumber) { + throw new IOException("Read number " + testBuffer[i] + + " but expected " + nextExpectedNumber); + } + + ++nextExpectedNumber; + if (nextExpectedNumber == modulus) { + nextExpectedNumber = 0; + } + } + } + + if (totalBytesRead != expectedSize) { + throw new IOException("Expected to read " + expectedSize + + " bytes but only received " + totalBytesRead); + } + } finally { + inputStream.close(); + } + } + + /** + * Generates test data of the given size according to some specific pattern + * and writes it to the provided output file. + * + * @param fs FileSystem + * @param path Test file to be generated + * @param size The size of the test data to be generated in bytes + * @param bufferLen Pattern length + * @param modulus Pattern modulus + * @throws IOException + * thrown if an error occurs while writing the data + */ + public static long generateTestFile(FileSystem fs, Path path, + final long size, + final int bufferLen, + final int modulus) throws IOException { + final byte[] testBuffer = new byte[bufferLen]; + for (int i = 0; i < testBuffer.length; ++i) { + testBuffer[i] = (byte) (i % modulus); + } + + final OutputStream outputStream = fs.create(path, false); + long bytesWritten = 0; + try { + while (bytesWritten < size) { + final long diff = size - bytesWritten; + if (diff < testBuffer.length) { + outputStream.write(testBuffer, 0, (int) diff); + bytesWritten += diff; + } else { + outputStream.write(testBuffer); + bytesWritten += testBuffer.length; + } + } + + return bytesWritten; + } finally { + outputStream.close(); + } + } + + /** + * Creates and reads a file with the given size. The test file is generated + * according to a specific pattern so it can be easily verified even if it's + * a multi-GB one. + * During the read phase the incoming data stream is also checked against + * this pattern. + * + * @param fs FileSystem + * @param parent Test file parent dir path + * @throws IOException + * thrown if an I/O error occurs while writing or reading the test file + */ + public static void createAndVerifyFile(FileSystem fs, Path parent, final long fileSize) + throws IOException { + int testBufferSize = fs.getConf() + .getInt(IO_CHUNK_BUFFER_SIZE, DEFAULT_IO_CHUNK_BUFFER_SIZE); + int modulus = fs.getConf() + .getInt(IO_CHUNK_MODULUS_SIZE, DEFAULT_IO_CHUNK_MODULUS_SIZE); + + final String objectName = UUID.randomUUID().toString(); + final Path objectPath = new Path(parent, objectName); + + // Write test file in a specific pattern + assertEquals(fileSize, + generateTestFile(fs, objectPath, fileSize, testBufferSize, modulus)); + assertPathExists(fs, "not created successful", objectPath); + + // Now read the same file back and verify its content + try { + verifyReceivedData(fs, objectPath, fileSize, testBufferSize, modulus); + } finally { + // Delete test file + fs.delete(objectPath, false); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml b/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml index b8857eb..38d68b3 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml +++ b/hadoop-common-project/hadoop-common/src/test/resources/contract/localfs.xml @@ -57,6 +57,10 @@ case sensitivity and permission options are determined at run time from OS type true + + fs.contract.rename-remove-dest-if-empty-dir + true + + +# Hadoop-AWS module: Integration with Amazon Web Services + +The `hadoop-aws` module provides support for AWS integration. The generated +JAR file, `hadoop-aws.jar` also declares a transitive dependency on all +external artifacts which are needed for this support —enabling downstream +applications to easily use this support. + +Features + +1. The "classic" `s3:` filesystem for storing objects in Amazon S3 Storage +1. The second-generation, `s3n:` filesystem, making it easy to share +data between hadoop and other applications via the S3 object store +1. The third generation, `s3a:` filesystem. Designed to be a switch in +replacement for `s3n:`, this filesystem binding supports larger files and promises +higher performance. + +The specifics of using these filesystems are documented below. + +## Warning: Object Stores are not filesystems. + +Amazon S3 is an example of "an object store". In order to achieve scalalablity +and especially high availability, S3 has —as many other cloud object stores have +done— relaxed some of the constraints which classic "POSIX" filesystems promise. + +Specifically + +1. Files that are newly created from the Hadoop Filesystem APIs may not be +immediately visible. +2. File delete and update operations may not immediately propagate. Old +copies of the file may exist for an indeterminate time period. +3. Directory operations: `delete()` and `rename()` are implemented by +recursive file-by-file operations. They take time at least proportional to +the number of files, during which time partial updates may be visible. If +the operations are interrupted, the filesystem is left in an intermediate state. + +For further discussion on these topics, please consult +[/filesystem](The Hadoop FileSystem API Definition). + +## Warning #2: your AWS credentials are valuable + +Your AWS credentials not only pay for services, they offer read and write +access to the data. Anyone with the credentials can not only read your datasets +—they can delete them. + +Do not inadvertently share these credentials through means such as +1. Checking in Hadoop configuration files containing the credentials. +1. Logging them to a console, as they invariably end up being seen. + +If you do any of these: change your credentials immediately! + + +## S3 + +### Authentication properties + + + fs.s3.awsAccessKeyId + AWS access key ID + + + + fs.s3.awsSecretAccessKey + AWS secret key + + + +## S3N + +### Authentication properties + + + fs.s3n.awsAccessKeyId + AWS access key ID + + + + fs.s3n.awsSecretAccessKey + AWS secret key + + +### Other properties + + + + fs.s3n.block.size + 67108864 + Block size to use when reading files using the native S3 + filesystem (s3n: URIs). + + + + fs.s3n.multipart.uploads.enabled + false + Setting this property to true enables multiple uploads to + native S3 filesystem. When uploading a file, it is split into blocks + if the size is larger than fs.s3n.multipart.uploads.block.size. + + + + + fs.s3n.multipart.uploads.block.size + 67108864 + The block size for multipart uploads to native S3 filesystem. + Default size is 64MB. + + + + + fs.s3n.multipart.copy.block.size + 5368709120 + The block size for multipart copy in native S3 filesystem. + Default size is 5GB. + + + + + fs.s3n.server-side-encryption-algorithm + + Specify a server-side encryption algorithm for S3. + The default is NULL, and the only other currently allowable value is AES256. + + + +## S3A + + +### Authentication properties + + + fs.s3a.awsAccessKeyId + AWS access key ID. Omit for Role-based authentication. + + + + fs.s3a.awsSecretAccessKey + AWS secret key. Omit for Role-based authentication. + + +### Other properties + + + fs.s3a.connection.maximum + 15 + Controls the maximum number of simultaneous connections to S3. + + + + fs.s3a.connection.ssl.enabled + true + Enables or disables SSL connections to S3. + + + + fs.s3a.attempts.maximum + 10 + How many times we should retry commands on transient errors. + + + + fs.s3a.connection.timeout + 5000 + Socket connection timeout in seconds. + + + + fs.s3a.paging.maximum + 5000 + How many keys to request from S3 when doing + directory listings at a time. + + + + fs.s3a.multipart.size + 104857600 + How big (in bytes) to split upload or copy operations up into. + + + + fs.s3a.multipart.threshold + 2147483647 + Threshold before uploads or copies use parallel multipart operations. + + + + fs.s3a.acl.default + Set a canned ACL for newly created and copied objects. Value may be private, + public-read, public-read-write, authenticated-read, log-delivery-write, + bucket-owner-read, or bucket-owner-full-control. + + + + fs.s3a.multipart.purge + false + True if you want to purge existing multipart uploads that may not have been + completed/aborted correctly + + + + fs.s3a.multipart.purge.age + 86400 + Minimum age in seconds of multipart uploads to purge + + + + fs.s3a.buffer.dir + ${hadoop.tmp.dir}/s3a + Comma separated list of directories that will be used to buffer file + uploads to. + + + + fs.s3a.impl + org.apache.hadoop.fs.s3a.S3AFileSystem + The implementation class of the S3A Filesystem + + + +## Testing the S3 filesystem clients + +To test the S3* filesystem clients, you need to provide two files +which pass in authentication details to the test runner + +1. `auth-keys.xml` +1. `core-site.xml` + +These are both Hadoop XML configuration files, which must be placed into +`hadoop-tools/hadoop-aws/src/test/resources`. + + +### `auth-keys.xml` + +The presence of this file triggers the testing of the S3 classes. + +Without this file, *none of the tests in this module will be executed* + +The XML file must contain all the ID/key information needed to connect +each of the filesystem clients to the object stores, and a URL for +each filesystem for its testing. + +1. `test.fs.s3n.name` : the URL of the bucket for S3n tests +1. `test.fs.s3a.name` : the URL of the bucket for S3a tests +2. `test.fs.s3.name` : the URL of the bucket for "S3" tests + +The contents of each bucket will be destroyed during the test process: +do not use the bucket for any purpose other than testing. + +Example: + + + + + test.fs.s3n.name + s3n://test-aws-s3n/ + + + + test.fs.s3a.name + s3a://test-aws-s3a/ + + + + test.fs.s3.name + s3a://test-aws-s3/ + + + + fs.s3.awsAccessKeyId + DONOTPCOMMITTHISKEYTOSCM + + + + fs.s3.awsSecretAccessKey + DONOTEVERSHARETHISSECRETKEY! + + + + fs.s3n.awsAccessKeyId + DONOTPCOMMITTHISKEYTOSCM + + + + fs.s3n.awsSecretAccessKey + DONOTEVERSHARETHISSECRETKEY! + + + + fs.s3a.awsAccessKeyId + AWS access key ID. Omit for Role-based authentication. + DONOTPCOMMITTHISKEYTOSCM + + + + fs.s3a.awsSecretAccessKey + AWS secret key. Omit for Role-based authentication. + DONOTEVERSHARETHISSECRETKEY! + + + +## File `contract-test-options.xml` + +The file `hadoop-tools/hadoop-aws/src/test/resources/contract-test-options.xml` +must be created and configured for the test fileystems. + +If a specific file `fs.contract.test.fs.*` test path is not defined for +any of the filesystems, those tests will be skipped. + +The standard S3 authentication details must also be provided. This can be +through copy-and-paste of the `auth-keys.xml` credentials, or it can be +through direct XInclude inclustion. + +#### s3:// + +The filesystem name must be defined in the property `fs.contract.test.fs.s3`. + + +Example: + + + fs.contract.test.fs.s3 + s3://test-aws-s3/ + + +### s3n:// + + +In the file `src/test/resources/contract-test-options.xml`, the filesystem +name must be defined in the property `fs.contract.test.fs.s3n`. +The standard configuration options to define the S3N authentication details +must also be provided. + +Example: + + + fs.contract.test.fs.s3n + s3n://test-aws-s3n/ + + +### s3a:// + + +In the file `src/test/resources/contract-test-options.xml`, the filesystem +name must be defined in the property `fs.contract.test.fs.s3a`. +The standard configuration options to define the S3N authentication details +must also be provided. + +Example: + + + fs.contract.test.fs.s3a + s3a://test-aws-s3a/ + + +### Complete example of `contract-test-options.xml` + + + + + + + + + + + + + fs.contract.test.fs.s3 + s3://test-aws-s3/ + + + + + fs.contract.test.fs.s3a + s3a://test-aws-s3a/ + + + + fs.contract.test.fs.s3n + s3n://test-aws-s3n/ + + + + +This example pulls in the `auth-keys.xml` file for the credentials. +This provides one single place to keep the keys up to date —and means +that the file `contract-test-options.xml` does not contain any +secret credentials itself. \ No newline at end of file diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractRename.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractRename.java index 88ed6d6..af1ed37 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractRename.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/TestS3AContractRename.java @@ -21,10 +21,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.contract.AbstractContractRenameTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -import org.apache.hadoop.fs.contract.AbstractFSContractTestBase; -import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Test; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; @@ -51,14 +51,11 @@ public void testRenameDirIntoExistingDir() throws Throwable { Path destFilePath = new Path(destDir, "dest-512.txt"); byte[] destDateset = dataset(512, 'A', 'Z'); - writeDataset(fs, destFilePath, destDateset, destDateset.length, 1024, false); + writeDataset(fs, destFilePath, destDateset, destDateset.length, 1024, + false); assertIsFile(destFilePath); boolean rename = fs.rename(srcDir, destDir); - Path renamedSrcFilePath = new Path(destDir, "source-256.txt"); - assertIsFile(destFilePath); - assertIsFile(renamedSrcFilePath); - ContractTestUtils.verifyFileContents(fs, destFilePath, destDateset); - assertTrue("rename returned false though the contents were copied", rename); + assertFalse("s3a doesn't support rename to non-empty directory", rename); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java deleted file mode 100644 index 53b3c03..0000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/Jets3tS3FileSystemContractTest.java +++ /dev/null @@ -1,31 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3; - -import java.io.IOException; - -public class Jets3tS3FileSystemContractTest - extends S3FileSystemContractBaseTest { - - @Override - FileSystemStore getFileSystemStore() throws IOException { - return new Jets3tFileSystemStore(); - } - -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java index 28b0507..de106f8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/S3FileSystemContractBaseTest.java @@ -21,13 +21,15 @@ import java.io.IOException; import java.net.URI; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.Path; +import org.junit.internal.AssumptionViolatedException; public abstract class S3FileSystemContractBaseTest extends FileSystemContractBaseTest { + public static final String KEY_TEST_FS = "test.fs.s3.name"; private FileSystemStore store; abstract FileSystemStore getFileSystemStore() throws IOException; @@ -37,7 +39,12 @@ protected void setUp() throws Exception { Configuration conf = new Configuration(); store = getFileSystemStore(); fs = new S3FileSystem(store); - fs.initialize(URI.create(conf.get("test.fs.s3.name")), conf); + String fsname = conf.get(KEY_TEST_FS); + if (StringUtils.isEmpty(fsname)) { + throw new AssumptionViolatedException( + "No test FS defined in :" + KEY_TEST_FS); + } + fs.initialize(URI.create(fsname), conf); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystemContract.java new file mode 100644 index 0000000..449fb39 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3/TestS3FileSystemContract.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3; + +import java.io.IOException; + +public class TestS3FileSystemContract + extends S3FileSystemContractBaseTest { + + @Override + FileSystemStore getFileSystemStore() throws IOException { + return new Jets3tFileSystemStore(); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3AFileSystemContractBaseTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3AFileSystemContractBaseTest.java deleted file mode 100644 index 8455233..0000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3AFileSystemContractBaseTest.java +++ /dev/null @@ -1,327 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import static org.junit.Assume.*; - -import org.junit.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.Path; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URI; -import java.util.UUID; - -/** - * Tests a live S3 system. If you keys and bucket aren't specified, all tests - * are marked as passed - * - * This uses BlockJUnit4ClassRunner because FileSystemContractBaseTest from - * TestCase which uses the old Junit3 runner that doesn't ignore assumptions - * properly making it impossible to skip the tests if we don't have a valid - * bucket. - **/ -public class S3AFileSystemContractBaseTest extends FileSystemContractBaseTest { - private static final int TEST_BUFFER_SIZE = 128; - private static final int MODULUS = 128; - - protected static final Logger LOG = LoggerFactory.getLogger(S3AFileSystemContractBaseTest.class); - - @Override - public void setUp() throws Exception { - Configuration conf = new Configuration(); - - URI testURI = URI.create(conf.get("test.fs.s3a.name")); - - boolean liveTest = testURI != null && !testURI.equals("s3a:///"); - - // This doesn't work with our JUnit 3 style test cases, so instead we'll - // make this whole class not run by default - assumeTrue(liveTest); - - fs = new S3AFileSystem(); - fs.initialize(testURI, conf); - super.setUp(); - } - - @Override - protected void tearDown() throws Exception { - if (fs != null) { - fs.delete(path("/tests3a"), true); - } - super.tearDown(); - } - - @Test(timeout = 10000) - public void testMkdirs() throws IOException { - // No trailing slash - assertTrue(fs.mkdirs(path("/tests3a/a"))); - assertTrue(fs.exists(path("/tests3a/a"))); - - // With trailing slash - assertTrue(fs.mkdirs(path("/tests3a/b/"))); - assertTrue(fs.exists(path("/tests3a/b/"))); - - // Two levels deep - assertTrue(fs.mkdirs(path("/tests3a/c/a/"))); - assertTrue(fs.exists(path("/tests3a/c/a/"))); - - // Mismatched slashes - assertTrue(fs.exists(path("/tests3a/c/a"))); - } - - - @Test(timeout=20000) - public void testDelete() throws IOException { - // Test deleting an empty directory - assertTrue(fs.mkdirs(path("/tests3a/d"))); - assertTrue(fs.delete(path("/tests3a/d"), true)); - assertFalse(fs.exists(path("/tests3a/d"))); - - // Test deleting a deep empty directory - assertTrue(fs.mkdirs(path("/tests3a/e/f/g/h"))); - assertTrue(fs.delete(path("/tests3a/e/f/g"), true)); - assertFalse(fs.exists(path("/tests3a/e/f/g/h"))); - assertFalse(fs.exists(path("/tests3a/e/f/g"))); - assertTrue(fs.exists(path("/tests3a/e/f"))); - - // Test delete of just a file - writeFile(path("/tests3a/f/f/file"), 1000); - assertTrue(fs.exists(path("/tests3a/f/f/file"))); - assertTrue(fs.delete(path("/tests3a/f/f/file"), false)); - assertFalse(fs.exists(path("/tests3a/f/f/file"))); - - - // Test delete of a path with files in various directories - writeFile(path("/tests3a/g/h/i/file"), 1000); - assertTrue(fs.exists(path("/tests3a/g/h/i/file"))); - writeFile(path("/tests3a/g/h/j/file"), 1000); - assertTrue(fs.exists(path("/tests3a/g/h/j/file"))); - try { - assertFalse(fs.delete(path("/tests3a/g/h"), false)); - fail("Expected delete to fail with recursion turned off"); - } catch (IOException e) {} - assertTrue(fs.exists(path("/tests3a/g/h/j/file"))); - assertTrue(fs.delete(path("/tests3a/g/h"), true)); - assertFalse(fs.exists(path("/tests3a/g/h/j"))); - } - - - @Test(timeout = 3600000) - public void testOpenCreate() throws IOException { - try { - createAndReadFileTest(1024); - } catch (IOException e) { - fail(e.getMessage()); - } - - try { - createAndReadFileTest(5 * 1024 * 1024); - } catch (IOException e) { - fail(e.getMessage()); - } - - try { - createAndReadFileTest(20 * 1024 * 1024); - } catch (IOException e) { - fail(e.getMessage()); - } - - /* - Enable to test the multipart upload - try { - createAndReadFileTest((long)6 * 1024 * 1024 * 1024); - } catch (IOException e) { - fail(e.getMessage()); - } - */ - } - - @Test(timeout = 1200000) - public void testRenameFile() throws IOException { - Path srcPath = path("/tests3a/a/srcfile"); - - final OutputStream outputStream = fs.create(srcPath, false); - generateTestData(outputStream, 11 * 1024 * 1024); - outputStream.close(); - - assertTrue(fs.exists(srcPath)); - - Path dstPath = path("/tests3a/b/dstfile"); - - assertFalse(fs.rename(srcPath, dstPath)); - assertTrue(fs.mkdirs(dstPath.getParent())); - assertTrue(fs.rename(srcPath, dstPath)); - assertTrue(fs.exists(dstPath)); - assertFalse(fs.exists(srcPath)); - assertTrue(fs.exists(srcPath.getParent())); - } - - - @Test(timeout = 10000) - public void testRenameDirectory() throws IOException { - Path srcPath = path("/tests3a/a"); - - assertTrue(fs.mkdirs(srcPath)); - writeFile(new Path(srcPath, "b/testfile"), 1024); - - Path nonEmptyPath = path("/tests3a/nonempty"); - writeFile(new Path(nonEmptyPath, "b/testfile"), 1024); - - assertFalse(fs.rename(srcPath, nonEmptyPath)); - - Path dstPath = path("/tests3a/b"); - assertTrue(fs.rename(srcPath, dstPath)); - assertFalse(fs.exists(srcPath)); - assertTrue(fs.exists(new Path(dstPath, "b/testfile"))); - } - - - @Test(timeout=10000) - public void testSeek() throws IOException { - Path path = path("/tests3a/testfile.seek"); - writeFile(path, TEST_BUFFER_SIZE * 10); - - - FSDataInputStream inputStream = fs.open(path, TEST_BUFFER_SIZE); - inputStream.seek(inputStream.getPos() + MODULUS); - - testReceivedData(inputStream, TEST_BUFFER_SIZE * 10 - MODULUS); - } - - /** - * Creates and reads a file with the given size in S3. The test file is - * generated according to a specific pattern. - * During the read phase the incoming data stream is also checked against this pattern. - * - * @param fileSize - * the size of the file to be generated in bytes - * @throws IOException - * thrown if an I/O error occurs while writing or reading the test file - */ - private void createAndReadFileTest(final long fileSize) throws IOException { - final String objectName = UUID.randomUUID().toString(); - final Path objectPath = new Path("/tests3a/", objectName); - - // Write test file to S3 - final OutputStream outputStream = fs.create(objectPath, false); - generateTestData(outputStream, fileSize); - outputStream.close(); - - // Now read the same file back from S3 - final InputStream inputStream = fs.open(objectPath); - testReceivedData(inputStream, fileSize); - inputStream.close(); - - // Delete test file - fs.delete(objectPath, false); - } - - - /** - * Receives test data from the given input stream and checks the size of the - * data as well as the pattern inside the received data. - * - * @param inputStream - * the input stream to read the test data from - * @param expectedSize - * the expected size of the data to be read from the input stream in bytes - * @throws IOException - * thrown if an error occurs while reading the data - */ - private void testReceivedData(final InputStream inputStream, - final long expectedSize) throws IOException { - final byte[] testBuffer = new byte[TEST_BUFFER_SIZE]; - - long totalBytesRead = 0; - int nextExpectedNumber = 0; - while (true) { - final int bytesRead = inputStream.read(testBuffer); - if (bytesRead < 0) { - break; - } - - totalBytesRead += bytesRead; - - for (int i = 0; i < bytesRead; ++i) { - if (testBuffer[i] != nextExpectedNumber) { - throw new IOException("Read number " + testBuffer[i] + " but expected " - + nextExpectedNumber); - } - - ++nextExpectedNumber; - - if (nextExpectedNumber == MODULUS) { - nextExpectedNumber = 0; - } - } - } - - if (totalBytesRead != expectedSize) { - throw new IOException("Expected to read " + expectedSize + - " bytes but only received " + totalBytesRead); - } - } - - - /** - * Generates test data of the given size according to some specific pattern - * and writes it to the provided output stream. - * - * @param outputStream - * the output stream to write the data to - * @param size - * the size of the test data to be generated in bytes - * @throws IOException - * thrown if an error occurs while writing the data - */ - private void generateTestData(final OutputStream outputStream, - final long size) throws IOException { - - final byte[] testBuffer = new byte[TEST_BUFFER_SIZE]; - for (int i = 0; i < testBuffer.length; ++i) { - testBuffer[i] = (byte) (i % MODULUS); - } - - long bytesWritten = 0; - while (bytesWritten < size) { - - final long diff = size - bytesWritten; - if (diff < testBuffer.length) { - outputStream.write(testBuffer, 0, (int)diff); - bytesWritten += diff; - } else { - outputStream.write(testBuffer); - bytesWritten += testBuffer.length; - } - } - } - - private void writeFile(Path name, int fileSize) throws IOException { - final OutputStream outputStream = fs.create(name, false); - generateTestData(outputStream, fileSize); - outputStream.close(); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java new file mode 100644 index 0000000..514647c --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.junit.internal.AssumptionViolatedException; + +import java.io.IOException; +import java.net.URI; + +public class S3ATestUtils { + + public static S3AFileSystem createTestFileSystem(Configuration conf) throws + IOException { + String fsname = conf.getTrimmed(TestS3AFileSystemContract.TEST_FS_S3A_NAME, ""); + + + boolean liveTest = !StringUtils.isEmpty(fsname); + URI testURI = null; + if (liveTest) { + testURI = URI.create(fsname); + liveTest = testURI.getScheme().equals(Constants.FS_S3A); + } + if (!liveTest) { + // This doesn't work with our JUnit 3 style test cases, so instead we'll + // make this whole class not run by default + throw new AssumptionViolatedException( + "No test filesystem in " + TestS3AFileSystemContract.TEST_FS_S3A_NAME); + } + S3AFileSystem fs1 = new S3AFileSystem(); + fs1.initialize(testURI, conf); + return fs1; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileSystemContract.java new file mode 100644 index 0000000..5c88358 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AFileSystemContract.java @@ -0,0 +1,105 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystemContractBaseTest; +import org.apache.hadoop.fs.Path; + +/** + * Tests a live S3 system. If your keys and bucket aren't specified, all tests + * are marked as passed. + * + * This uses BlockJUnit4ClassRunner because FileSystemContractBaseTest from + * TestCase which uses the old Junit3 runner that doesn't ignore assumptions + * properly making it impossible to skip the tests if we don't have a valid + * bucket. + **/ +public class TestS3AFileSystemContract extends FileSystemContractBaseTest { + + protected static final Logger LOG = + LoggerFactory.getLogger(TestS3AFileSystemContract.class); + public static final String TEST_FS_S3A_NAME = "test.fs.s3a.name"; + + @Override + public void setUp() throws Exception { + Configuration conf = new Configuration(); + + fs = S3ATestUtils.createTestFileSystem(conf); + super.setUp(); + } + + @Override + protected void tearDown() throws Exception { + if (fs != null) { + fs.delete(path("test"), true); + } + super.tearDown(); + } + + @Override + public void testMkdirsWithUmask() throws Exception { + // not supported + } + + @Override + public void testRenameFileAsExistingFile() throws Exception { + if (!renameSupported()) return; + + Path src = path("/test/hadoop/file"); + createFile(src); + Path dst = path("/test/new/newfile"); + createFile(dst); + // s3 doesn't support rename option + // rename-overwrites-dest is always allowed. + rename(src, dst, true, false, true); + } + + @Override + public void testRenameDirectoryAsExistingDirectory() throws Exception { + if (!renameSupported()) { + return; + } + + Path src = path("/test/hadoop/dir"); + fs.mkdirs(src); + createFile(path("/test/hadoop/dir/file1")); + createFile(path("/test/hadoop/dir/subdir/file2")); + + Path dst = path("/test/new/newdir"); + fs.mkdirs(dst); + rename(src, dst, true, false, true); + assertFalse("Nested file1 exists", + fs.exists(path("/test/hadoop/dir/file1"))); + assertFalse("Nested file2 exists", + fs.exists(path("/test/hadoop/dir/subdir/file2"))); + assertTrue("Renamed nested file1 exists", + fs.exists(path("/test/new/newdir/file1"))); + assertTrue("Renamed nested exists", + fs.exists(path("/test/new/newdir/subdir/file2"))); + } + +// @Override + public void testMoveDirUnderParent() throws Throwable { + // not support because + // Fails if dst is a directory that is not empty. + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java new file mode 100644 index 0000000..e0cbc92 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.Path; + +import org.apache.hadoop.fs.s3a.S3ATestUtils; +import org.junit.After; +import org.junit.Before; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URI; + +import static org.junit.Assume.assumeTrue; + +/** + * Base class for scale tests; here is where the common scale configuration + * keys are defined + */ +public class S3AScaleTestBase { + + public static final String SCALE_TEST = "scale.test."; + public static final String KEY_OPERATION_COUNT = + SCALE_TEST + "operation.count"; + public static final long DEFAULT_OPERATION_COUNT = 2005; + + protected S3AFileSystem fs; + private static final Logger LOG = + LoggerFactory.getLogger(S3AScaleTestBase.class); + + private Configuration conf; + + /** + * Configuration generator. May be overridden to inject + * some custom options + * @return a configuration with which to create FS instances + */ + protected Configuration createConfiguration() { + return new Configuration(); + } + + /** + * Get the configuration used to set up the FS + * @return the configuration + */ + public Configuration getConf() { + return conf; + } + + @Before + public void setUp() throws Exception { + conf = createConfiguration(); + fs = S3ATestUtils.createTestFileSystem(conf); + } + + @After + public void tearDown() throws Exception { + ContractTestUtils.rm(fs, getTestPath(), true, true); + } + + protected Path getTestPath() { + return new Path("/tests3a"); + } + + protected long getOperationCount() { + return getConf().getLong(KEY_OPERATION_COUNT, DEFAULT_OPERATION_COUNT); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java new file mode 100644 index 0000000..c913a67 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/TestS3ADeleteManyFiles.java @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.scale; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; + +import static org.junit.Assert.assertEquals; + +public class TestS3ADeleteManyFiles extends S3AScaleTestBase { + private static final Logger LOG = + LoggerFactory.getLogger(TestS3ADeleteManyFiles.class); + + + @Rule + public Timeout testTimeout = new Timeout(30 * 60 * 1000); + + @Test + public void testBulkRenameAndDelete() throws Throwable { + final Path scaleTestDir = getTestPath(); + final Path srcDir = new Path(scaleTestDir, "src"); + final Path finalDir = new Path(scaleTestDir, "final"); + final long count = getOperationCount(); + ContractTestUtils.rm(fs, scaleTestDir, true, false); + + fs.mkdirs(srcDir); + fs.mkdirs(finalDir); + + int testBufferSize = fs.getConf() + .getInt(ContractTestUtils.IO_CHUNK_BUFFER_SIZE, + ContractTestUtils.DEFAULT_IO_CHUNK_BUFFER_SIZE); + // use Executor to speed up file creation + ExecutorService exec = Executors.newFixedThreadPool(16); + final ExecutorCompletionService completionService = + new ExecutorCompletionService(exec); + try { + final byte[] data = ContractTestUtils.dataset(testBufferSize, 'a', 'z'); + + for (int i = 0; i < count; ++i) { + final String fileName = "foo-" + i; + completionService.submit(new Callable() { + @Override + public Boolean call() throws IOException { + ContractTestUtils.createFile(fs, new Path(srcDir, fileName), + false, data); + return fs.exists(new Path(srcDir, fileName)); + } + }); + } + for (int i = 0; i < count; ++i) { + final Future future = completionService.take(); + try { + if (!future.get()) { + LOG.warn("cannot create file"); + } + } catch (ExecutionException e) { + LOG.warn("Error while uploading file", e.getCause()); + throw e; + } + } + } finally { + exec.shutdown(); + } + + int nSrcFiles = fs.listStatus(srcDir).length; + fs.rename(srcDir, finalDir); + assertEquals(nSrcFiles, fs.listStatus(finalDir).length); + ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", + new Path(srcDir, "foo-" + 0)); + ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", + new Path(srcDir, "foo-" + count / 2)); + ContractTestUtils.assertPathDoesNotExist(fs, "not deleted after rename", + new Path(srcDir, "foo-" + (count - 1))); + ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", + new Path(finalDir, "foo-" + 0)); + ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", + new Path(finalDir, "foo-" + count/2)); + ContractTestUtils.assertPathExists(fs, "not renamed to dest dir", + new Path(finalDir, "foo-" + (count-1))); + + ContractTestUtils.assertDeleted(fs, finalDir, true, false); + } + + @Test + public void testOpenCreate() throws IOException { + Path dir = new Path("/tests3a"); + ContractTestUtils.createAndVerifyFile(fs, dir, 1024); + ContractTestUtils.createAndVerifyFile(fs, dir, 5 * 1024 * 1024); + ContractTestUtils.createAndVerifyFile(fs, dir, 20 * 1024 * 1024); + + + /* + Enable to test the multipart upload + try { + ContractTestUtils.createAndVerifyFile(fs, dir, + (long)6 * 1024 * 1024 * 1024); + } catch (IOException e) { + fail(e.getMessage()); + } + */ + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java deleted file mode 100644 index 6516c83..0000000 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/Jets3tNativeS3FileSystemContractTest.java +++ /dev/null @@ -1,30 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3native; - -import java.io.IOException; - -public class Jets3tNativeS3FileSystemContractTest - extends NativeS3FileSystemContractBaseTest { - - @Override - NativeFileSystemStore getNativeFileSystemStore() throws IOException { - return new Jets3tNativeFileSystemStore(); - } -} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java index ac6b9ec..f215219 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/NativeS3FileSystemContractBaseTest.java @@ -22,15 +22,17 @@ import java.io.InputStream; import java.net.URI; +import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystemContractBaseTest; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3native.NativeS3FileSystem.NativeS3FsInputStream; +import org.junit.internal.AssumptionViolatedException; public abstract class NativeS3FileSystemContractBaseTest extends FileSystemContractBaseTest { - + public static final String KEY_TEST_FS = "test.fs.s3n.name"; private NativeFileSystemStore store; abstract NativeFileSystemStore getNativeFileSystemStore() throws IOException; @@ -40,7 +42,12 @@ protected void setUp() throws Exception { Configuration conf = new Configuration(); store = getNativeFileSystemStore(); fs = new NativeS3FileSystem(store); - fs.initialize(URI.create(conf.get("test.fs.s3n.name")), conf); + String fsname = conf.get(KEY_TEST_FS); + if (StringUtils.isEmpty(fsname)) { + throw new AssumptionViolatedException( + "No test FS defined in :" + KEY_TEST_FS); + } + fs.initialize(URI.create(fsname), conf); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeS3FileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeS3FileSystemContract.java new file mode 100644 index 0000000..b645f93 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3native/TestJets3tNativeS3FileSystemContract.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3native; + +import java.io.IOException; + +public class TestJets3tNativeS3FileSystemContract + extends NativeS3FileSystemContractBaseTest { + + @Override + NativeFileSystemStore getNativeFileSystemStore() throws IOException { + return new Jets3tNativeFileSystemStore(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml b/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml index 4142471..4f9c081 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/contract/s3a.xml @@ -48,6 +48,11 @@ + fs.contract.rename-remove-dest-if-empty-dir + true + + + fs.contract.supports-append false diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml new file mode 100644 index 0000000..3397769 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -0,0 +1,51 @@ + + + + + + + + + + hadoop.tmp.dir + target/build/test + A base for other temporary directories. + true + + + + + hadoop.security.authentication + simple + + + + + + + + diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 1b9d915..97b44bf 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -597,15 +597,6 @@ Release 2.6.0 - UNRELEASED YARN-2671. Fixed ApplicationSubmissionContext to still set resource for backward compatibility. (Wangda Tan via zjshen) - YARN-2667. Fix the release audit warning caused by hadoop-yarn-registry - (Yi Liu via jlowe) - - YARN-2651. Spun off LogRollingInterval from LogAggregationContext. (Xuan Gong - via zjshen) - - YARN-2377. Localization exception stack traces are not passed as - diagnostic info (Gera Shegalov via jlowe) - Release 2.5.1 - 2014-09-05 INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/LogAggregationContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/LogAggregationContext.java index 46c1809..9a0a157 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/LogAggregationContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/LogAggregationContext.java @@ -37,6 +37,13 @@ * which match the defined exclude pattern and those log files * will not be uploaded. If the log file name matches both the * include and the exclude pattern, this file will be excluded eventually + *
  • rollingIntervalSeconds. The default value is -1. By default, + * the logAggregationService only uploads container logs when + * the application is finished. This configure defines + * how often the logAggregationSerivce uploads container logs in seconds. + * By setting this configure, the logAggregationSerivce can upload container + * logs periodically when the application is running. + *
  • * *

    * @@ -50,10 +57,11 @@ @Public @Unstable public static LogAggregationContext newInstance(String includePattern, - String excludePattern) { + String excludePattern, long rollingIntervalSeconds) { LogAggregationContext context = Records.newRecord(LogAggregationContext.class); context.setIncludePattern(includePattern); context.setExcludePattern(excludePattern); + context.setRollingIntervalSeconds(rollingIntervalSeconds); return context; } @@ -92,4 +100,22 @@ public static LogAggregationContext newInstance(String includePattern, @Public @Unstable public abstract void setExcludePattern(String excludePattern); + + /** + * Get rollingIntervalSeconds + * + * @return the rollingIntervalSeconds + */ + @Public + @Unstable + public abstract long getRollingIntervalSeconds(); + + /** + * Set rollingIntervalSeconds + * + * @param rollingIntervalSeconds + */ + @Public + @Unstable + public abstract void setRollingIntervalSeconds(long rollingIntervalSeconds); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/SerializedException.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/SerializedException.java index 9355a23..aba54d4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/SerializedException.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/SerializedException.java @@ -90,22 +90,4 @@ public static SerializedException newInstance(Throwable e) { @Private @Unstable public abstract Throwable deSerialize(); - - private void stringify(StringBuilder sb) { - sb.append(getMessage()) - .append("\n") - .append(getRemoteTrace()); - final SerializedException cause = getCause(); - if (cause != null) { - sb.append("Caused by: "); - cause.stringify(sb); - } - } - - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(128); - stringify(sb); - return sb.toString(); - } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index dd7fa89..e2fd8dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -712,13 +712,6 @@ public static final long DEFAULT_NM_LOG_RETAIN_SECONDS = 3 * 60 * 60; /** - * Define how often NMs wake up and upload log files - */ - public static final String NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS = - NM_PREFIX + "log-aggregation.roll-monitoring-interval-seconds"; - public static final long - DEFAULT_NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS = -1; - /** * Number of threads used in log cleanup. Only applicable if Log aggregation * is disabled */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 136192d..8db451d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -302,6 +302,7 @@ message ApplicationSubmissionContextProto { message LogAggregationContextProto { optional string include_pattern = 1 [default = ".*"]; optional string exclude_pattern = 2 [default = ""]; + optional int64 rolling_interval_seconds = 3 [default = -1]; } enum ApplicationAccessTypeProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/LogAggregationContextPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/LogAggregationContextPBImpl.java index dc7a21d..4406ef9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/LogAggregationContextPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/LogAggregationContextPBImpl.java @@ -116,4 +116,19 @@ public void setExcludePattern(String excludePattern) { } builder.setExcludePattern(excludePattern); } + + @Override + public long getRollingIntervalSeconds() { + LogAggregationContextProtoOrBuilder p = viaProto ? proto : builder; + if (! p.hasRollingIntervalSeconds()) { + return -1; + } + return p.getRollingIntervalSeconds(); + } + + @Override + public void setRollingIntervalSeconds(long rollingIntervalSeconds) { + maybeInitBuilder(); + builder.setRollingIntervalSeconds(rollingIntervalSeconds); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 13059f4..572f85c97 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1412,6 +1412,7 @@ + The root zookeeper node for the registry hadoop.registry.zk.root /registry @@ -1427,7 +1428,7 @@ - Zookeeper session timeout in milliseconds + Zookeeper connection timeout in milliseconds hadoop.registry.zk.connection.timeout.ms 15000 @@ -1522,14 +1523,4 @@ Client - - Defines how often NMs wake up to upload log files. - The default value is -1. By default, the logs will be uploaded when - the application is finished. By setting this configure, logs can be uploaded - periodically when the application is running. The minimum rolling-interval-seconds - can be set is 3600. - - yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds - -1 -
    diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml index 04569db..ed80c42 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml @@ -130,7 +130,6 @@ apache-rat-plugin - src/main/resources/.keep diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java index d3b33e8..c2dcebf 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/ResourceLocalizationService.java @@ -1008,12 +1008,12 @@ LocalizerHeartbeatResponse update( response.setLocalizerAction(LocalizerAction.LIVE); break; case FETCH_FAILURE: - final String diagnostics = stat.getException().toString(); - LOG.warn(req + " failed: " + diagnostics); + LOG.info("DEBUG: FAILED " + req + + ", " + stat.getException().getMessage()); response.setLocalizerAction(LocalizerAction.DIE); getLocalResourcesTracker(req.getVisibility(), user, applicationId) .handle(new ResourceFailedLocalizationEvent( - req, diagnostics)); + req, stat.getException().getMessage())); // unlocking the resource and removing it from scheduled resource // list diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java index 98e3caf..63f7c66 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/AppLogAggregatorImpl.java @@ -148,11 +148,9 @@ public AppLogAggregatorImpl(Dispatcher dispatcher, } else { this.retentionSize = configuredRentionSize; } - long configuredRollingMonitorInterval = conf.getLong( - YarnConfiguration - .NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS, - YarnConfiguration - .DEFAULT_NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS); + long configuredRollingMonitorInterval = + this.logAggregationContext == null ? -1 : this.logAggregationContext + .getRollingIntervalSeconds(); boolean debug_mode = conf.getBoolean(NM_LOG_AGGREGATION_DEBUG_ENABLED, DEFAULT_NM_LOG_AGGREGATION_DEBUG_ENABLED); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java index 7850a1c..2c69843 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/TestContainerManagerRecovery.java @@ -130,7 +130,8 @@ public void testApplicationRecovery() throws Exception { containerTokens, acls); // create the logAggregationContext LogAggregationContext logAggregationContext = - LogAggregationContext.newInstance("includePattern", "excludePattern"); + LogAggregationContext.newInstance("includePattern", "excludePattern", + 1000); StartContainersResponse startResponse = startContainer(context, cm, cid, clc, logAggregationContext); assertTrue(startResponse.getFailedRequests().isEmpty()); @@ -167,6 +168,8 @@ public void testApplicationRecovery() throws Exception { LogAggregationContext recovered = ((ApplicationImpl) app).getLogAggregationContext(); assertNotNull(recovered); + assertEquals(logAggregationContext.getRollingIntervalSeconds(), + recovered.getRollingIntervalSeconds()); assertEquals(logAggregationContext.getIncludePattern(), recovered.getIncludePattern()); assertEquals(logAggregationContext.getExcludePattern(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java index ab86a18..2c0f349 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/logaggregation/TestLogAggregationService.java @@ -1235,12 +1235,10 @@ private void testLogAggregationService(boolean retentionSizeLimitation) throws Exception { LogAggregationContext logAggregationContextWithInterval = Records.newRecord(LogAggregationContext.class); + logAggregationContextWithInterval.setRollingIntervalSeconds(5000); this.conf.set(YarnConfiguration.NM_LOG_DIRS, localLogDir.getAbsolutePath()); this.conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, this.remoteRootLogDir.getAbsolutePath()); - this.conf.setLong( - YarnConfiguration.NM_LOG_AGGREGATION_ROLL_MONITORING_INTERVAL_SECONDS, - 3600); if (retentionSizeLimitation) { // set the retention size as 1. The number of logs for one application // in one NM should be 1. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java index 365c754..85ef381 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/TestContainerAllocation.java @@ -212,14 +212,16 @@ public void testLogAggregationContextPassedIntoContainerToken() .assertNull(getLogAggregationContextFromContainerToken(rm1, nm1, null)); // create a not-null LogAggregationContext + final int interval = 2000; LogAggregationContext logAggregationContext = LogAggregationContext.newInstance( - "includePattern", "excludePattern"); + "includePattern", "excludePattern", interval); LogAggregationContext returned = getLogAggregationContextFromContainerToken(rm1, nm2, logAggregationContext); Assert.assertEquals("includePattern", returned.getIncludePattern()); Assert.assertEquals("excludePattern", returned.getExcludePattern()); + Assert.assertEquals(interval, returned.getRollingIntervalSeconds()); rm1.stop(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/index.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/index.md index a9ea24f..3a648b6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/index.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/index.md @@ -24,5 +24,6 @@ and use the binding information to connect with the services's network-accessibl endpoints, be they REST, IPC, Web UI, Zookeeper quorum+path or some other protocol. * [Architecture](yarn-registry.html) +* [Configuration](registry-configuration.html) * [Using the YARN Service registry](using-the-yarn-service-registry.html) * [Security](registry-security.html) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-configuration.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-configuration.md new file mode 100644 index 0000000..5c31d99 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-configuration.md @@ -0,0 +1,366 @@ + + +# Registry Configuration + +## Core Settings + + +### Enabling the Registry in the Resource Manager + +The Resource Manager manages user directory creation and record cleanup +on YARN container/application attempt/application completion. + + + + + + Is the registry enabled: does the RM start it up, + create the user and system paths, and purge + service records when containers, application attempts + and applications complete + + hadoop.registry.rm.enabled + false + + +### Setting the Zookeeper Quorum: `hadoop.registry.zk.quorum` + +This is an essential setting: it identifies the lists of zookeeper hosts +and the ports on which the ZK services are listening. + + + + + + List of hostname:port pairs defining the + zookeeper quorum binding for the registry + + hadoop.registry.zk.quorum + localhost:2181 + + +It takes a comma-separated list, such as `zk1:2181,zk2:2181,zk3:2181` + +### Setting the Zookeeper Registry Base path: `hadoop.registry.zk.root` + +This path sets the base zookeeper node for the registry + + + + The root zookeeper node for the registry + + hadoop.registry.zk.root + /registry + + +The default value is normally sufficient. + +## Security Options + +Registry security is enabled when the property `hadoop.registry.secure` +is set to `true`. Once set, nodes are created with permissions, so that +only a specific user *and the configured cluster "superuser" accounts* +can write under their home path of `/users`. Only the superuser accounts +will be able to manipulate the root path, including `/services` and `/users`. + +All write operations on the registry (including deleting entries and paths) +must be authenticated. Read operations are still permitted by unauthenticated +callers. + +The key settings for secure registry support are: + +* enabling the secure mode: `hadoop.registry.secure`. +* listing the superuser zookeeper ACLs: `hadoop.registry.system.acls`. +* listing the kerberos realm for the principals: `hadoop.registry.kerberos.realm` +* identifying the JAAS context within the JAAS configuration which defines +the user: `hadoop.registry.jaas.context` + + +### Enabling security + + + + Key to set if the registry is secure. Turning it on + changes the permissions policy from "open access" + to restrictions on kerberos with the option of + a user adding one or more auth key pairs down their + own tree. + + hadoop.registry.secure + false + + +### Identifying the client JAAS context + +The registry clients must identify the JAAS context which they use +to authenticate to the registry. + + + + Key to define the JAAS context. Used in secure + mode + + hadoop.registry.jaas.context + Client + + +*Note* as the Resource Manager is simply another client of the registry, it +too must have this context defined. + + +### Identifying the system accounts `hadoop.registry.system.acls` + +These are the the accounts which are given full access to the base of the +registry. The Resource Manager needs this option to create the root paths. + +Clients also need this option, so that the registry client can give those +same accounts access to the nodes it creates. (This is a requirement of the +ZK security model) + +1. The property `hadoop.registry.system.acls` takes a comma separate list +of zookeeper `ACLs` which are given full access to created nodes; the permissions +`READ | WRITE | CREATE | DELETE | ADMIN`. +1. Any zookeeper ACL scheme may be added to this, such as the `digest:` scheme. +1. The SASL scheme, `sasl:` is used to identify which callers identified +by sasl have full access. These are the superuser accounts. +1. They may be identified by elements such as `sasl:yarn@REALM.COM`. +1. To aid portability the setting, any sasl entry without the realm value —that +is any entry that terminates in the "@" symbol— has the current realm appended +to it. +1. This realm is set to that of the current user. +1. It may be overridden by the property `hadoop.registry.kerberos.realm`. + + + + A comma separated list of Zookeeper ACL identifiers with + system access to the registry in a secure cluster. + + These are given full access to all entries. + + If there is an "@" at the end of a SASL entry it + instructs the registry client to append the default kerberos domain. + + hadoop.registry.system.acls + sasl:yarn@, sasl:mapred@, sasl:mapred@hdfs@ + + + + + The kerberos realm: used to set the realm of + system principals which do not declare their realm, + and any other accounts that need the value. + + If empty, the default realm of the running process + is used. + + If neither are known and the realm is needed, then the registry + service/client will fail. + + hadoop.registry.kerberos.realm + + + +## Zookeeper connection management options + +Some low level options manage the ZK connection —more specifically, its failure +handling. + +The Zookeeper registry clients use Apache Curator to connect to Zookeeper, +a library which detects timeouts and attempts to reconnect to one of the +servers which forms the zookeeper quorum. It is only after a timeout is detected +that a retry is triggered. + + + + + Zookeeper session timeout in milliseconds + + hadoop.registry.zk.session.timeout.ms + 60000 + + + + + Zookeeper connection timeout in milliseconds + + hadoop.registry.zk.connection.timeout.ms + 15000 + + + + + Zookeeper connection retry count before failing + + hadoop.registry.zk.retry.times + 5 + + + + + + hadoop.registry.zk.retry.interval.ms + 1000 + + + + + Zookeeper retry limit in milliseconds, during + exponential backoff: {@value} + + This places a limit even + if the retry times and interval limit, combined + with the backoff policy, result in a long retry + period + + hadoop.registry.zk.retry.ceiling.ms + 60000 + + +The retry strategy used in the registry client is +[`BoundedExponentialBackoffRetry`](https://curator.apache.org/apidocs/org/apache/curator/retry/BoundedExponentialBackoffRetry.html): +This backs off exponentially on connection failures, before eventually +concluding that the quorum is unreachable and failing. + +## Complete Set of Configuration Options + + + + + + + Is the registry enabled: does the RM start it up, + create the user and system paths, and purge + service records when containers, application attempts + and applications complete + + hadoop.registry.rm.enabled + false + + + + + List of hostname:port pairs defining the + zookeeper quorum binding for the registry + + hadoop.registry.zk.quorum + localhost:2181 + + + + + The root zookeeper node for the registry + + hadoop.registry.zk.root + /registry + + + + + + Key to set if the registry is secure. Turning it on + changes the permissions policy from "open access" + to restrictions on kerberos with the option of + a user adding one or more auth key pairs down their + own tree. + + hadoop.registry.secure + false + + + + + A comma separated list of Zookeeper ACL identifiers with + system access to the registry in a secure cluster. + + These are given full access to all entries. + + If there is an "@" at the end of a SASL entry it + instructs the registry client to append the default kerberos domain. + + hadoop.registry.system.acls + sasl:yarn@, sasl:mapred@, sasl:mapred@hdfs@ + + + + + The kerberos realm: used to set the realm of + system principals which do not declare their realm, + and any other accounts that need the value. + + If empty, the default realm of the running process + is used. + + If neither are known and the realm is needed, then the registry + service/client will fail. + + hadoop.registry.kerberos.realm + + + + + + Key to define the JAAS context. Used in secure + mode + + hadoop.registry.jaas.context + Client + + + + + + Zookeeper session timeout in milliseconds + + hadoop.registry.zk.session.timeout.ms + 60000 + + + + + Zookeeper session timeout in milliseconds + + hadoop.registry.zk.connection.timeout.ms + 15000 + + + + + Zookeeper connection retry count before failing + + hadoop.registry.zk.retry.times + 5 + + + + + + hadoop.registry.zk.retry.interval.ms + 1000 + + + + + Zookeeper retry limit in milliseconds, during + exponential backoff: {@value} + + This places a limit even + if the retry times and interval limit, combined + with the backoff policy, result in a long retry + period + + hadoop.registry.zk.retry.ceiling.ms + 60000 + + \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-security.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-security.md index 7278534..6317681 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-security.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/registry/registry-security.md @@ -24,8 +24,8 @@ This document is therefore relevant only to secure clusters. ## Security Model The security model of the registry is designed to meet the following goals -a secur -1. Deliver functional security on e ZK installation. +a secure registry: +1. Deliver functional security on a secure ZK installation. 1. Allow the RM to create per-user regions of the registration space 1. Allow applications belonging to a user to write registry entries into their part of the space. These may be short-lived or long-lived